... | ... |
@@ -27,6 +27,7 @@ Description: The software formalises a framework for classification in R. |
27 | 27 |
License: GPL-3 |
28 | 28 |
Packaged: 2014-10-18 11:16:55 UTC; dario |
29 | 29 |
RoxygenNote: 7.2.0 |
30 |
+SystemRequirements: C++14 |
|
30 | 31 |
Collate: |
31 | 32 |
'ROCplot.R' |
32 | 33 |
'available.R' |
... | ... |
@@ -60,6 +61,7 @@ Collate: |
60 | 61 |
'interfaceSVM.R' |
61 | 62 |
'performancePlot.R' |
62 | 63 |
'plotFeatureClasses.R' |
64 |
+ 'prepareData.R' |
|
63 | 65 |
'previousSelection.R' |
64 | 66 |
'previousTrained.R' |
65 | 67 |
'rankingBartlett.R' |
66 | 68 |
deleted file mode 100644 |
... | ... |
@@ -1,187 +0,0 @@ |
1 |
-# Generated by roxygen2: do not edit by hand |
|
2 |
- |
|
3 |
-export(ClassifyResult) |
|
4 |
-export(CrossValParams) |
|
5 |
-export(DLDApredictInterface) |
|
6 |
-export(DLDAtrainInterface) |
|
7 |
-export(DMDranking) |
|
8 |
-export(FeatureSetCollection) |
|
9 |
-export(GLMpredictInterface) |
|
10 |
-export(GLMtrainInterface) |
|
11 |
-export(KolmogorovSmirnovRanking) |
|
12 |
-export(KullbackLeiblerRanking) |
|
13 |
-export(ModellingParams) |
|
14 |
-export(NSCfeatures) |
|
15 |
-export(NSCpredictInterface) |
|
16 |
-export(NSCtrainInterface) |
|
17 |
-export(PredictParams) |
|
18 |
-export(ROCplot) |
|
19 |
-export(SVMpredictInterface) |
|
20 |
-export(SVMtrainInterface) |
|
21 |
-export(SelectParams) |
|
22 |
-export(TrainParams) |
|
23 |
-export(TransformParams) |
|
24 |
-export(actualOutcome) |
|
25 |
-export(available) |
|
26 |
-export(bartlettRanking) |
|
27 |
-export(calcCVperformance) |
|
28 |
-export(calcExternalPerformance) |
|
29 |
-export(chosenFeatureNames) |
|
30 |
-export(classifyInterface) |
|
31 |
-export(coxnetPredictInterface) |
|
32 |
-export(coxnetTrainInterface) |
|
33 |
-export(coxphPredictInterface) |
|
34 |
-export(coxphRanking) |
|
35 |
-export(coxphTrainInterface) |
|
36 |
-export(crossValidate) |
|
37 |
-export(differentMeansRanking) |
|
38 |
-export(distribution) |
|
39 |
-export(edgeRranking) |
|
40 |
-export(edgesToHubNetworks) |
|
41 |
-export(elasticNetFeatures) |
|
42 |
-export(elasticNetGLMpredictInterface) |
|
43 |
-export(elasticNetGLMtrainInterface) |
|
44 |
-export(featureSetSummary) |
|
45 |
-export(featuresInfo) |
|
46 |
-export(fisherDiscriminant) |
|
47 |
-export(forestFeatures) |
|
48 |
-export(generateCrossValParams) |
|
49 |
-export(generateModellingParams) |
|
50 |
-export(getLocationsAndScales) |
|
51 |
-export(interactorDifferences) |
|
52 |
-export(kNNinterface) |
|
53 |
-export(kTSPclassifier) |
|
54 |
-export(leveneRanking) |
|
55 |
-export(likelihoodRatioRanking) |
|
56 |
-export(limmaRanking) |
|
57 |
-export(mixModelsPredict) |
|
58 |
-export(mixModelsTrain) |
|
59 |
-export(models) |
|
60 |
-export(naiveBayesKernel) |
|
61 |
-export(pairsDifferencesRanking) |
|
62 |
-export(performance) |
|
63 |
-export(performancePlot) |
|
64 |
-export(plotFeatureClasses) |
|
65 |
-export(predictions) |
|
66 |
-export(previousSelection) |
|
67 |
-export(previousTrained) |
|
68 |
-export(randomForestPredictInterface) |
|
69 |
-export(randomForestTrainInterface) |
|
70 |
-export(rankingPlot) |
|
71 |
-export(rfsrcPredictInterface) |
|
72 |
-export(rfsrcTrainInterface) |
|
73 |
-export(runTest) |
|
74 |
-export(runTests) |
|
75 |
-export(sampleNames) |
|
76 |
-export(samplesMetricMap) |
|
77 |
-export(selectionPlot) |
|
78 |
-export(subtractFromLocation) |
|
79 |
-export(totalPredictions) |
|
80 |
-export(tunedParameters) |
|
81 |
-exportClasses(ClassifyResult) |
|
82 |
-exportClasses(CrossValParams) |
|
83 |
-exportClasses(FeatureSetCollection) |
|
84 |
-exportClasses(ModellingParams) |
|
85 |
-exportClasses(PredictParams) |
|
86 |
-exportClasses(SelectParams) |
|
87 |
-exportClasses(TrainParams) |
|
88 |
-exportClasses(TransformParams) |
|
89 |
-exportMethods("[") |
|
90 |
-exportMethods("[[") |
|
91 |
-exportMethods(ClassifyResult) |
|
92 |
-exportMethods(DLDApredictInterface) |
|
93 |
-exportMethods(DLDAtrainInterface) |
|
94 |
-exportMethods(DMDranking) |
|
95 |
-exportMethods(FeatureSetCollection) |
|
96 |
-exportMethods(GLMpredictInterface) |
|
97 |
-exportMethods(GLMtrainInterface) |
|
98 |
-exportMethods(KolmogorovSmirnovRanking) |
|
99 |
-exportMethods(KullbackLeiblerRanking) |
|
100 |
-exportMethods(NSCfeatures) |
|
101 |
-exportMethods(NSCpredictInterface) |
|
102 |
-exportMethods(NSCtrainInterface) |
|
103 |
-exportMethods(PredictParams) |
|
104 |
-exportMethods(ROCplot) |
|
105 |
-exportMethods(SVMpredictInterface) |
|
106 |
-exportMethods(SVMtrainInterface) |
|
107 |
-exportMethods(SelectParams) |
|
108 |
-exportMethods(TrainParams) |
|
109 |
-exportMethods(TransformParams) |
|
110 |
-exportMethods(actualOutcome) |
|
111 |
-exportMethods(bartlettRanking) |
|
112 |
-exportMethods(calcCVperformance) |
|
113 |
-exportMethods(calcExternalPerformance) |
|
114 |
-exportMethods(chosenFeatureNames) |
|
115 |
-exportMethods(classifyInterface) |
|
116 |
-exportMethods(coxnetPredictInterface) |
|
117 |
-exportMethods(coxnetTrainInterface) |
|
118 |
-exportMethods(coxphPredictInterface) |
|
119 |
-exportMethods(coxphRanking) |
|
120 |
-exportMethods(coxphTrainInterface) |
|
121 |
-exportMethods(crossValidate) |
|
122 |
-exportMethods(differentMeansRanking) |
|
123 |
-exportMethods(distribution) |
|
124 |
-exportMethods(edgeRranking) |
|
125 |
-exportMethods(elasticNetFeatures) |
|
126 |
-exportMethods(elasticNetGLMpredictInterface) |
|
127 |
-exportMethods(elasticNetGLMtrainInterface) |
|
128 |
-exportMethods(featureSetSummary) |
|
129 |
-exportMethods(featuresInfo) |
|
130 |
-exportMethods(fisherDiscriminant) |
|
131 |
-exportMethods(forestFeatures) |
|
132 |
-exportMethods(getLocationsAndScales) |
|
133 |
-exportMethods(interactorDifferences) |
|
134 |
-exportMethods(kNNinterface) |
|
135 |
-exportMethods(kTSPclassifier) |
|
136 |
-exportMethods(length) |
|
137 |
-exportMethods(leveneRanking) |
|
138 |
-exportMethods(likelihoodRatioRanking) |
|
139 |
-exportMethods(limmaRanking) |
|
140 |
-exportMethods(mixModelsPredict) |
|
141 |
-exportMethods(mixModelsTrain) |
|
142 |
-exportMethods(models) |
|
143 |
-exportMethods(naiveBayesKernel) |
|
144 |
-exportMethods(pairsDifferencesRanking) |
|
145 |
-exportMethods(performance) |
|
146 |
-exportMethods(performancePlot) |
|
147 |
-exportMethods(plotFeatureClasses) |
|
148 |
-exportMethods(predict) |
|
149 |
-exportMethods(predictions) |
|
150 |
-exportMethods(previousSelection) |
|
151 |
-exportMethods(previousTrained) |
|
152 |
-exportMethods(randomForestPredictInterface) |
|
153 |
-exportMethods(randomForestTrainInterface) |
|
154 |
-exportMethods(rankingPlot) |
|
155 |
-exportMethods(rfsrcPredictInterface) |
|
156 |
-exportMethods(rfsrcTrainInterface) |
|
157 |
-exportMethods(runTest) |
|
158 |
-exportMethods(runTests) |
|
159 |
-exportMethods(sampleNames) |
|
160 |
-exportMethods(samplesMetricMap) |
|
161 |
-exportMethods(selectionPlot) |
|
162 |
-exportMethods(show) |
|
163 |
-exportMethods(subtractFromLocation) |
|
164 |
-exportMethods(totalPredictions) |
|
165 |
-exportMethods(tunedParameters) |
|
166 |
-import(BiocParallel) |
|
167 |
-import(grid) |
|
168 |
-import(methods) |
|
169 |
-import(stats) |
|
170 |
-import(utils) |
|
171 |
-importFrom(MultiAssayExperiment,colData) |
|
172 |
-importFrom(MultiAssayExperiment,wideFormat) |
|
173 |
-importFrom(S4Vectors,as.data.frame) |
|
174 |
-importFrom(S4Vectors,do.call) |
|
175 |
-importFrom(S4Vectors,mcols) |
|
176 |
-importFrom(dplyr,mutate) |
|
177 |
-importFrom(dplyr,n) |
|
178 |
-importFrom(genefilter,rowFtests) |
|
179 |
-importFrom(genefilter,rowttests) |
|
180 |
-importFrom(randomForest,importance) |
|
181 |
-importFrom(randomForest,varUsed) |
|
182 |
-importFrom(rlang,sym) |
|
183 |
-importFrom(survival,Surv) |
|
184 |
-importFrom(survival,concordance) |
|
185 |
-importFrom(survival,coxph) |
|
186 |
-importFrom(tidyr,gather) |
|
187 |
-useDynLib(ClassifyR, .registration = TRUE) |
... | ... |
@@ -59,9 +59,7 @@ |
59 | 59 |
#' actual <- factor(c(rep("Healthy", 10), rep("Cancer", 10)), levels = c("Healthy", "Cancer")) |
60 | 60 |
#' result1 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
61 | 61 |
#' value = c("Melanoma", "t-test", "Random Forest", "2-fold")), |
62 |
-#' LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
63 |
-#' `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
64 |
-#' list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
62 |
+#' LETTERS[1:20], list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
65 | 63 |
#' list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
66 | 64 |
#' list(function(oracle){}), NULL, predicted, actual) |
67 | 65 |
#' |
... | ... |
@@ -69,9 +67,7 @@ |
69 | 67 |
#' predicted[c(2, 6), "Cancer"] <- c(0.60, 0.40) |
70 | 68 |
#' result2 <- ClassifyResult(DataFrame(characteristic = c("Data Set", "Selection Name", "Classifier Name", "Cross-validation"), |
71 | 69 |
#' value = c("Example", "Bartlett Test", "Differential Variability", "2-fold")), |
72 |
-#' LETTERS[1:20], DataFrame(`Original Feature` = paste("Gene", LETTERS[1:10]), |
|
73 |
-#' `Renamed Feature` = paste("Feature", 1:10, sep = ''), check.names = FALSE), |
|
74 |
-#' list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
70 |
+#' LETTERS[1:20], list(paste("Gene", LETTERS[1:10]), paste("Gene", LETTERS[c(5:1, 6:10)])), |
|
75 | 71 |
#' list(paste("Gene", LETTERS[1:3]), paste("Gene", LETTERS[1:5])), |
76 | 72 |
#' list(function(oracle){}), NULL, predicted, actual) |
77 | 73 |
#' ROCplot(list(result1, result2), plotTitle = "Cancer ROC") |
... | ... |
@@ -75,9 +75,7 @@ |
75 | 75 |
#' class = factor(sample(LETTERS[1:2], 50, replace = TRUE))) |
76 | 76 |
#' actual <- factor(sample(LETTERS[1:2], 10, replace = TRUE)) |
77 | 77 |
#' result <- ClassifyResult(DataFrame(characteristic = "Data Set", value = "Example"), |
78 |
-#' paste("A", 1:10, sep = ''), DataFrame(`Original Feature` = paste("Gene", 1:50), |
|
79 |
-#' `Renamed Feature` = paste("Feature", 1:50, sep = '')), |
|
80 |
-#' list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)), |
|
78 |
+#' paste("A", 1:10, sep = ''), list(paste("Gene", 1:50), paste("Gene", 1:50)), list(paste("Gene", 1:5), paste("Gene", 1:10)), |
|
81 | 79 |
#' list(function(oracle){}), NULL, predictTable, actual) |
82 | 80 |
#' result <- calcCVperformance(result) |
83 | 81 |
#' performance(result) |
... | ... |
@@ -462,7 +462,7 @@ setMethod("TransformParams", "function", |
462 | 462 |
{ |
463 | 463 |
if(ncol(characteristics) == 0 || !"Transform Name" %in% characteristics[, "characteristic"]) |
464 | 464 |
{ |
465 |
- characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Transform Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == transform@generic, "name"])) |
|
465 |
+ characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Transform Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(transform, "name"), "name"])) |
|
466 | 466 |
} |
467 | 467 |
new("TransformParams", transform = transform, characteristics = characteristics, |
468 | 468 |
intermediate = intermediate, otherParams = list(...)) |
... | ... |
@@ -829,11 +829,11 @@ setMethod("SelectParams", c("functionOrList"), |
829 | 829 |
{ |
830 | 830 |
if(!is.list(featureRanking) && (ncol(characteristics) == 0 || !"Selection Name" %in% characteristics[, "characteristic"])) |
831 | 831 |
{ |
832 |
- characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Selection Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == featureRanking@generic, "name"])) |
|
832 |
+ characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Selection Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(featureRanking, "name"), "name"])) |
|
833 | 833 |
} |
834 | 834 |
if(is.list(featureRanking) && (ncol(characteristics) == 0 || !"Ensemble Selection" %in% characteristics[, "characteristic"])) |
835 | 835 |
{ |
836 |
- selectMethodNames <- unlist(lapply(featureRanking, function(rankingFunction) .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == rankingFunction@generic, "name"])) |
|
836 |
+ selectMethodNames <- unlist(lapply(featureRanking, function(rankingFunction) .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(rankingFunction, "name"), "name"])) |
|
837 | 837 |
characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Ensemble Selection", value = paste(selectMethodNames, collapse = ", "))) |
838 | 838 |
} |
839 | 839 |
others <- list(...) |
... | ... |
@@ -966,7 +966,7 @@ setMethod("TrainParams", c("function"), |
966 | 966 |
{ |
967 | 967 |
if(ncol(characteristics) == 0 || !"Classifier Name" %in% characteristics[, "characteristic"]) |
968 | 968 |
{ |
969 |
- characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Classifier Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == classifier@generic, "name"])) |
|
969 |
+ characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Classifier Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == attr(classifier, "name"), "name"])) |
|
970 | 970 |
} |
971 | 971 |
new("TrainParams", classifier = classifier, characteristics = characteristics, |
972 | 972 |
intermediate = intermediate, getFeatures = getFeatures, tuneParams = tuneParams, |
... | ... |
@@ -991,15 +991,8 @@ setMethod("show", "TrainParams", |
991 | 991 |
cat(otherInfo[rowIndex, "characteristic"], ": ", otherInfo[rowIndex, "value"], ".\n", sep = '') |
992 | 992 |
} |
993 | 993 |
} |
994 |
- |
|
995 |
- if(!is.null(object@getFeatures)) |
|
996 |
- cat("Selected Features Extracted By: ", object@getFeatures@generic, ".\n", sep = '') |
|
997 | 994 |
}) |
998 | 995 |
|
999 |
- |
|
1000 |
- |
|
1001 |
- |
|
1002 |
- |
|
1003 | 996 |
##### PredictParams ##### |
1004 | 997 |
|
1005 | 998 |
#' @exportClass PredictParams |
... | ... |
@@ -1082,10 +1075,6 @@ setMethod("PredictParams", c("functionOrNULL"), |
1082 | 1075 |
{ |
1083 | 1076 |
if(missing(predictor)) |
1084 | 1077 |
stop("Either a function or NULL must be specified by 'predictor'.") |
1085 |
- if(!is.null(predictor) && (ncol(characteristics) == 0 || !"Predictor Name" %in% characteristics[, "characteristic"])) |
|
1086 |
- { |
|
1087 |
- characteristics <- rbind(characteristics, S4Vectors::DataFrame(characteristic = "Predictor Name", value = .ClassifyRenvir[["functionsTable"]][.ClassifyRenvir[["functionsTable"]][, "character"] == predictor@generic, "name"])) |
|
1088 |
- } |
|
1089 | 1078 |
others <- list(...) |
1090 | 1079 |
if(length(others) == 0) others <- NULL |
1091 | 1080 |
new("PredictParams", predictor = predictor, characteristics = characteristics, |
... | ... |
@@ -1227,7 +1216,6 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1227 | 1216 |
#' @aliases ClassifyResult ClassifyResult-class |
1228 | 1217 |
#' ClassifyResult,DataFrame,character,characterOrDataFrame-method |
1229 | 1218 |
#' show,ClassifyResult-method sampleNames sampleNames,ClassifyResult-method |
1230 |
-#' featuresInfo featuresInfo,ClassifyResult-method |
|
1231 | 1219 |
#' predictions predictions,ClassifyResult-method actualOutcome |
1232 | 1220 |
#' actualOutcome,ClassifyResult-method features features,ClassifyResult-method |
1233 | 1221 |
#' models models,ClassifyResult-method performance |
... | ... |
@@ -1247,10 +1235,9 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1247 | 1235 |
#' package, the function names will automatically be generated and therefore it |
1248 | 1236 |
#' is not necessary to specify them.} |
1249 | 1237 |
#' \item{\code{originalNames}}{All sample names.} |
1250 |
-#' \item{\code{featuresInfo}}{A \code{\link{DataFrame}} containing all feature names in original format |
|
1251 |
-#' and a safe format without any unusual symbols that R would automatically convert into another format and cause trouble.} |
|
1252 |
-#' \item{\code{rankedFeatures}}{All features, from most to least important. Character vector |
|
1253 |
-#' or a data frame if data set has multiple kinds of measurements on the same set of samples.} |
|
1238 |
+#' \item{\code{originalFeatures}}{All feature names. Character vector |
|
1239 |
+#' or \code{\link{DataFrame}} with one row for each feature if the data set has multiple kinds |
|
1240 |
+#' of measurements on the same set of samples.} |
|
1254 | 1241 |
#' \item{\code{chosenFeatures}}{Features selected at each fold. Character |
1255 | 1242 |
#' vector or a data frame if data set has multiple kinds of measurements on the same set of samples.} |
1256 | 1243 |
#' \item{\code{models}}{All of the models fitted to the training data.} |
... | ... |
@@ -1276,8 +1263,6 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1276 | 1263 |
#' \describe{ |
1277 | 1264 |
#' \item{\code{sampleNames(result)}}{Returns a vector of sample names present in the data set.}} |
1278 | 1265 |
#' \describe{ |
1279 |
-#' \item{\code{featuresInfo(result)}}{Returns a table of features present in the data set. Shows original names and renamed names to ensure no unusual symbols in names.}} |
|
1280 |
-#' \describe{ |
|
1281 | 1266 |
#' \item{\code{actualOutcome(result)}}{Returns the known outcome of each sample.}} |
1282 | 1267 |
#' \describe{ |
1283 | 1268 |
#' \item{\code{models(result)}}{A \code{list} of the models fitted for each training.}} |
... | ... |
@@ -1317,7 +1302,7 @@ setClassUnion("ModellingParamsOrNULL", c("ModellingParams", "NULL")) |
1317 | 1302 |
#' @importFrom S4Vectors as.data.frame |
1318 | 1303 |
#' @usage NULL |
1319 | 1304 |
#' @export |
1320 |
-setGeneric("ClassifyResult", function(characteristics, originalNames, featuresInfo, ...) |
|
1305 |
+setGeneric("ClassifyResult", function(characteristics, originalNames, ...) |
|
1321 | 1306 |
standardGeneric("ClassifyResult")) |
1322 | 1307 |
|
1323 | 1308 |
#' @rdname ClassifyResult-class |
... | ... |
@@ -1325,7 +1310,7 @@ standardGeneric("ClassifyResult")) |
1325 | 1310 |
setClass("ClassifyResult", representation( |
1326 | 1311 |
characteristics = "DataFrame", |
1327 | 1312 |
originalNames = "character", |
1328 |
- featuresInfo = "DataFrame", |
|
1313 |
+ originalFeatures = "characterOrDataFrame", |
|
1329 | 1314 |
rankedFeatures = "listOrNULL", |
1330 | 1315 |
chosenFeatures = "listOrNULL", |
1331 | 1316 |
actualOutcome = "factorOrSurv", |
... | ... |
@@ -1340,12 +1325,11 @@ setClass("ClassifyResult", representation( |
1340 | 1325 |
#' @rdname ClassifyResult-class |
1341 | 1326 |
#' @usage NULL |
1342 | 1327 |
#' @export |
1343 |
-setMethod("ClassifyResult", c("DataFrame", "character", "characterOrDataFrame"), |
|
1344 |
- function(characteristics, originalNames, featuresInfo, |
|
1328 |
+setMethod("ClassifyResult", c("DataFrame", "character"), |
|
1329 |
+ function(characteristics, originalNames, originalFeatures, |
|
1345 | 1330 |
rankedFeatures, chosenFeatures, models, tunedParameters, predictions, actualOutcome, importance = NULL, modellingParams = NULL, finalModel = NULL) |
1346 | 1331 |
{ |
1347 |
- new("ClassifyResult", characteristics = characteristics, |
|
1348 |
- originalNames = originalNames, featuresInfo = featuresInfo, |
|
1332 |
+ new("ClassifyResult", characteristics = characteristics, originalNames = originalNames, originalFeatures = originalFeatures, |
|
1349 | 1333 |
rankedFeatures = rankedFeatures, chosenFeatures = chosenFeatures, |
1350 | 1334 |
models = models, tune = tunedParameters, |
1351 | 1335 |
predictions = predictions, actualOutcome = actualOutcome, importance = importance, modellingParams = modellingParams, finalModel = finalModel) |
... | ... |
@@ -1383,7 +1367,7 @@ standardGeneric("sampleNames")) |
1383 | 1367 |
#' @rdname ClassifyResult-class |
1384 | 1368 |
#' @usage NULL |
1385 | 1369 |
#' @export |
1386 |
-setMethod("sampleNames", c("ClassifyResult"), |
|
1370 |
+setMethod("sampleNames", "ClassifyResult", |
|
1387 | 1371 |
function(object) |
1388 | 1372 |
{ |
1389 | 1373 |
object@originalNames |
... | ... |
@@ -1392,16 +1376,16 @@ setMethod("sampleNames", c("ClassifyResult"), |
1392 | 1376 |
#' @rdname ClassifyResult-class |
1393 | 1377 |
#' @usage NULL |
1394 | 1378 |
#' @export |
1395 |
-setGeneric("featuresInfo", function(object, ...) |
|
1396 |
-standardGeneric("featuresInfo")) |
|
1379 |
+setGeneric("allFeatureNames", function(object, ...) |
|
1380 |
+standardGeneric("allFeatureNames")) |
|
1397 | 1381 |
|
1398 | 1382 |
#' @rdname ClassifyResult-class |
1399 | 1383 |
#' @usage NULL |
1400 | 1384 |
#' @export |
1401 |
-setMethod("featuresInfo", c("ClassifyResult"), |
|
1385 |
+setMethod("allFeatureNames", c("ClassifyResult"), |
|
1402 | 1386 |
function(object) |
1403 | 1387 |
{ |
1404 |
- object@featuresInfo |
|
1388 |
+ object@originalFeatures |
|
1405 | 1389 |
}) |
1406 | 1390 |
|
1407 | 1391 |
#' @rdname ClassifyResult-class |
... | ... |
@@ -1413,7 +1397,7 @@ standardGeneric("chosenFeatureNames")) |
1413 | 1397 |
#' @rdname ClassifyResult-class |
1414 | 1398 |
#' @usage NULL |
1415 | 1399 |
#' @export |
1416 |
-setMethod("chosenFeatureNames", c("ClassifyResult"), |
|
1400 |
+setMethod("chosenFeatureNames", "ClassifyResult", |
|
1417 | 1401 |
function(object) |
1418 | 1402 |
{ |
1419 | 1403 |
object@chosenFeatures |
... | ... |
@@ -1427,7 +1411,7 @@ standardGeneric("models")) |
1427 | 1411 |
#' @rdname ClassifyResult-class |
1428 | 1412 |
#' @usage NULL |
1429 | 1413 |
#' @export |
1430 |
-setMethod("models", c("ClassifyResult"), |
|
1414 |
+setMethod("models", "ClassifyResult", |
|
1431 | 1415 |
function(object) |
1432 | 1416 |
{ |
1433 | 1417 |
object@models |
... | ... |
@@ -1441,7 +1425,7 @@ standardGeneric("predictions")) |
1441 | 1425 |
#' @rdname ClassifyResult-class |
1442 | 1426 |
#' @usage NULL |
1443 | 1427 |
#' @export |
1444 |
-setMethod("predictions", c("ClassifyResult"), |
|
1428 |
+setMethod("predictions", "ClassifyResult", |
|
1445 | 1429 |
function(object) |
1446 | 1430 |
{ |
1447 | 1431 |
object@predictions |
... | ... |
@@ -1455,7 +1439,7 @@ standardGeneric("performance")) |
1455 | 1439 |
#' @rdname ClassifyResult-class |
1456 | 1440 |
#' @usage NULL |
1457 | 1441 |
#' @export |
1458 |
-setMethod("performance", c("ClassifyResult"), |
|
1442 |
+setMethod("performance", "ClassifyResult", |
|
1459 | 1443 |
function(object) |
1460 | 1444 |
{ |
1461 | 1445 |
object@performance |
... | ... |
@@ -1469,7 +1453,7 @@ standardGeneric("actualOutcome")) |
1469 | 1453 |
#' @rdname ClassifyResult-class |
1470 | 1454 |
#' @usage NULL |
1471 | 1455 |
#' @export |
1472 |
-setMethod("actualOutcome", c("ClassifyResult"), |
|
1456 |
+setMethod("actualOutcome", "ClassifyResult", |
|
1473 | 1457 |
function(object) |
1474 | 1458 |
{ |
1475 | 1459 |
object@actualOutcome |
... | ... |
@@ -1496,7 +1480,7 @@ standardGeneric("totalPredictions")) |
1496 | 1480 |
#' @rdname ClassifyResult-class |
1497 | 1481 |
#' @usage NULL |
1498 | 1482 |
#' @export |
1499 |
-setMethod("totalPredictions", c("ClassifyResult"), |
|
1483 |
+setMethod("totalPredictions", "ClassifyResult", |
|
1500 | 1484 |
function(result) |
1501 | 1485 |
{ |
1502 | 1486 |
nrow(predictions(result)) |
... | ... |
@@ -24,13 +24,10 @@ |
24 | 24 |
"classifyInterface", "Poisson LDA", |
25 | 25 |
"differentMeansRanking", "Difference in Means", |
26 | 26 |
"DLDAtrainInterface", "Diagonal LDA", |
27 |
- "DLDApredictInterface", "Diagonal LDA", |
|
28 | 27 |
"DMDranking", "Differences of Medians and Deviations", |
29 | 28 |
"edgeRranking", "edgeR LRT", |
30 | 29 |
"GLMtrainInterface", "Logistic Regression", |
31 |
- "GLMpredictInterface", "Logistic Regression", |
|
32 | 30 |
"elasticNetGLMtrainInterface", "Elastic Net GLM", |
33 |
- "elasticNetGLMpredictInterface", "Elastic Net GLM", |
|
34 | 31 |
"fisherDiscriminant", "Fisher's LDA", |
35 | 32 |
"kNNinterface", "k Nearest Neighbours", |
36 | 33 |
"KolmogorovSmirnovRanking", "Kolmogorov-Smirnov Test", |
... | ... |
@@ -40,27 +37,19 @@ |
40 | 37 |
"likelihoodRatioRanking", "Likelihood Ratio Test (Normal)", |
41 | 38 |
"limmaRanking", "Moderated t-test", |
42 | 39 |
"mixModelsTrain", "Mixtures of Normals", |
43 |
- "mixModelsPredict", "Mixtures of Normals", |
|
44 | 40 |
"naiveBayesKernel", "Naive Bayes Kernel", |
45 | 41 |
"NSCtrainInterface", "Nearest Shrunken Centroids", |
46 |
- "NSCpredictInterface", "Nearest Shrunken Centroids", |
|
47 | 42 |
"pairsDifferencesRanking", "Pairs Differences", |
48 | 43 |
"previousSelection", "Previous Selection", |
49 | 44 |
"previousTrained", "Previous Trained", |
50 | 45 |
"randomForestTrainInterface", "Random Forest", |
51 |
- "randomForestPredictInterface", "Random Forest", |
|
52 | 46 |
"subtractFromLocation", "Location Subtraction", |
53 | 47 |
"SVMtrainInterface", "Support Vector Machine", |
54 |
- "SVMpredictInterface", "Support Vector Machine", |
|
55 | 48 |
"coxphTrainInterface", "Cox Proportional Hazards", |
56 |
- "coxphPredictInterface", "Cox Proportional Hazards", |
|
57 | 49 |
"coxphRanking", "Cox Proportional Hazards", |
58 | 50 |
"coxnetTrainInterface", "Penalised Cox Proportional Hazards", |
59 |
- "coxnetPredictInterface", "Penalised Cox Proportional Hazards", |
|
60 | 51 |
#"NEMOEtrainInterface", "Nutrition-Ecotype Mixture of Experts", |
61 |
- #"NEMOEpredictInterface", "Nutrition-Ecotype Mixture of Experts", |
|
62 |
- "rfsrcTrainInterface", "Random Survival Forest", |
|
63 |
- "rfsrcPredictInterface", "Random Survival Forest"), |
|
52 |
+ "rfsrcTrainInterface", "Random Survival Forest"), |
|
64 | 53 |
ncol = 2, byrow = TRUE, dimnames = list(NULL, c("character", "name")) |
65 | 54 |
) |> as.data.frame() |
66 | 55 |
|
... | ... |
@@ -13,7 +13,7 @@ |
13 | 13 |
#' column name in \code{colData(measurements)} if \code{measurements} is a \code{\link{MultiAssayExperiment}}. If a column name, that column will be |
14 | 14 |
#' removed before training. Or a \code{\link{Surv}} object or a character vector of length 2 or 3 specifying the time and event columns in |
15 | 15 |
#' \code{measurements} for survival outcome. |
16 |
-#' @param ... Arguments other than measurements and outcome in the generic. |
|
16 |
+#' @param ... Parameters passed into \code{\link{prepareData}}. |
|
17 | 17 |
#' @param nFeatures The number of features to be used for classification. If this is a single number, the same number of features will be used for all comparisons |
18 | 18 |
#' or assays. If a numeric vector these will be optimised over using \code{selectionOptimisation}. If a named vector with the same names of multiple assays, |
19 | 19 |
#' a different number of features will be used for each assay. If a named list of vectors, the respective number of features will be optimised over. |
... | ... |
@@ -95,18 +95,21 @@ setMethod("crossValidate", "DataFrame", |
95 | 95 |
nFolds = 5, |
96 | 96 |
nRepeats = 20, |
97 | 97 |
nCores = 1, |
98 |
- characteristicsLabel = NULL) |
|
98 |
+ characteristicsLabel = NULL, ...) |
|
99 | 99 |
|
100 | 100 |
{ |
101 |
- # Check that data is in the right format |
|
102 |
- splitAssay <- .splitDataAndOutcome(measurements, outcome) |
|
103 |
- measurements <- splitAssay[["measurements"]] |
|
104 |
- outcome <- splitAssay[["outcome"]] |
|
101 |
+ # Check that data is in the right format, if not already done for MultiAssayExperiment input. |
|
102 |
+ if(is.null(mcols(measurements)$assay)) |
|
103 |
+ { |
|
104 |
+ splitAssay <- prepareData(measurements, outcome, ...) |
|
105 |
+ measurements <- splitAssay[["measurements"]] |
|
106 |
+ outcome <- splitAssay[["outcome"]] |
|
107 |
+ } |
|
105 | 108 |
|
106 | 109 |
# Which data-types or data-views are present? |
107 | 110 |
assayIDs <- unique(mcols(measurements)[, "assay"]) |
108 |
- if(is.null(assayIDs)) |
|
109 |
- assayIDs <- 1 |
|
111 |
+ if(!is.null(characteristicsLabel)) assayIDs <- characteristicsLabel |
|
112 |
+ if(is.null(assayIDs)) assayIDs <- 1 |
|
110 | 113 |
|
111 | 114 |
checkData(measurements, outcome) |
112 | 115 |
|
... | ... |
@@ -303,8 +306,8 @@ setMethod("crossValidate", "MultiAssayExperiment", |
303 | 306 |
nCores = 1, |
304 | 307 |
characteristicsLabel = NULL) |
305 | 308 |
{ |
306 |
- targets <- c(names(measurements), "sampleInfo") |
|
307 |
- omicsTargets <- setdiff(targets, "sampleInfo") |
|
309 |
+ targets <- c(names(measurements), "clinical") |
|
310 |
+ omicsTargets <- setdiff(targets, "clinical") |
|
308 | 311 |
if(length(omicsTargets) > 0) |
309 | 312 |
{ |
310 | 313 |
if(any(anyReplicated(measurements[, , omicsTargets]))) |
... | ... |
@@ -103,7 +103,7 @@ setMethod("distribution", "ClassifyResult", |
103 | 103 |
allFeaturesText <- allFeatures |
104 | 104 |
} else if(is(chosenFeatures[[1]], "DataFrame")) { |
105 | 105 |
allFeatures <- do.call(rbind, chosenFeatures) |
106 |
- allFeaturesText <- paste(allFeatures[, "Original Assay"], allFeatures[, "Original Feature"], sep = ':') |
|
106 |
+ allFeaturesText <- paste(allFeatures[, "assay"], allFeatures[, "feature"], sep = ':') |
|
107 | 107 |
} else if("Pairs" %in% class(chosenFeatures[[1]])) { |
108 | 108 |
allFeatures <- do.call(c, unname(chosenFeatures)) |
109 | 109 |
allFeaturesText <- paste(first(allFeatures), second(allFeatures), sep = ', ') |
... | ... |
@@ -14,8 +14,8 @@ |
14 | 14 |
#' If of type \code{\link{DataFrame}} or \code{\link{MultiAssayExperiment}}, the data set is subset |
15 | 15 |
#' to only those features of type \code{numeric}. |
16 | 16 |
#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the |
17 |
-#' names of the data tables to be used. \code{"sampleInfo"} is also a valid value |
|
18 |
-#' and specifies that numeric variables from the sample information data table will be |
|
17 |
+#' names of the data tables to be used. \code{"clinical"} is also a valid value |
|
18 |
+#' and specifies that numeric variables from the clinical data table will be |
|
19 | 19 |
#' used. |
20 | 20 |
#' @param ... Variables not used by the \code{matrix} nor the |
21 | 21 |
#' \code{MultiAssayExperiment} method which are passed into and used by the |
... | ... |
@@ -74,17 +74,17 @@ setMethod("getLocationsAndScales", "DataFrame", # Sample information data or one |
74 | 74 |
c(location, scale)) |
75 | 75 |
}) |
76 | 76 |
|
77 |
-# One or more omics data sets, possibly with sample information data. |
|
77 |
+# One or more omics data sets, possibly with clinical data. |
|
78 | 78 |
#' @rdname getLocationsAndScales |
79 | 79 |
#' @export |
80 | 80 |
setMethod("getLocationsAndScales", "MultiAssayExperiment", |
81 | 81 |
function(measurements, targets = names(measurements), ...) |
82 | 82 |
{ |
83 |
- if(!all(targets %in% c(names(measurements), "sampleInfo"))) |
|
84 |
- stop("Some table names in 'targets' are not assay names in 'measurements' or \"sampleInfo\".") |
|
83 |
+ if(!all(targets %in% c(names(measurements), "clinical"))) |
|
84 |
+ stop("Some table names in 'targets' are not assay names in 'measurements' or \"clinical\".") |
|
85 | 85 |
|
86 | 86 |
combinedData <- .MAEtoWideTable(measurements, targets, NULL) |
87 | 87 |
if(class(combinedData) == "list") |
88 | 88 |
combinedData <- combinedData[["dataTable"]] |
89 | 89 |
getLocationsAndScales(combinedData, ...) |
90 |
-}) |
|
91 | 90 |
\ No newline at end of file |
91 |
+}) |
... | ... |
@@ -1,94 +1,12 @@ |
1 |
-#' An Interface for PoiClaClu Package's Classify Function |
|
2 |
-#' |
|
3 |
-#' More details of Poisson LDA are available in the documentation of |
|
4 |
-#' \code{\link[PoiClaClu]{Classify}}. Data tables which consist entirely of |
|
5 |
-#' non-integer data cannot be analysed. |
|
6 |
-#' |
|
7 |
-#' @aliases classifyInterface classifyInterface,matrix-method |
|
8 |
-#' classifyInterface,DataFrame-method |
|
9 |
-#' classifyInterface,MultiAssayExperiment-method |
|
10 |
-#' @param countsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
11 |
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
12 |
-#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
13 |
-#' If of type \code{\link{DataFrame}} or \code{\link{MultiAssayExperiment}}, the data set is subset |
|
14 |
-#' to only those features of type \code{numeric}. |
|
15 |
-#' @param classesTrain A vector of class labels of class \code{\link{factor}} of the |
|
16 |
-#' same length as the number of samples in \code{countsTrain} if it is a |
|
17 |
-#' \code{\link{matrix}} or a \code{\link{DataFrame}} or a character vector of length 1 |
|
18 |
-#' containing the column name in \code{countsTrain} if it is a \code{\link{DataFrame}} or the |
|
19 |
-#' column name in \code{colData(countsTrain)} if \code{countsTrain} is a |
|
20 |
-#' \code{\link{MultiAssayExperiment}}. If a column name, that column will be |
|
21 |
-#' removed before training. |
|
22 |
-#' @param countsTest An object of the same class as \code{countsTrain} with no |
|
23 |
-#' samples in common with \code{countsTrain} and the same number of features |
|
24 |
-#' as it. |
|
25 |
-#' @param targets If \code{countsTrain} is a \code{MultiAssayExperiment}, the |
|
26 |
-#' names of the data tables to be used. \code{"sampleInfo"} is also a valid value |
|
27 |
-#' and specifies that integer variables from the sample information table will be |
|
28 |
-#' used. |
|
29 |
-#' @param ... Variables not used by the \code{matrix} nor the |
|
30 |
-#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
31 |
-#' \code{DataFrame} method or parameters that \code{\link[PoiClaClu]{Classify}} |
|
32 |
-#' can accept. |
|
33 |
-#' @param returnType Default: \code{"both"}. Either \code{"class"}, |
|
34 |
-#' \code{"score"} or \code{"both"}. Sets the return value from the prediction |
|
35 |
-#' to either a vector of class labels, matrix of scores for each class, or both |
|
36 |
-#' labels and scores in a \code{data.frame}. |
|
37 |
-#' @param verbose Default: 3. A number between 0 and 3 for the amount of |
|
38 |
-#' progress messages to give. This function only prints progress messages if |
|
39 |
-#' the value is 3. |
|
40 |
-#' @return Either a factor vector of predicted classes, a matrix of scores for |
|
41 |
-#' each class, or a table of both the class labels and class scores, depending |
|
42 |
-#' on the setting of \code{returnType}. |
|
43 |
-#' @author Dario Strbenac |
|
44 |
-#' @examples |
|
45 |
-#' |
|
46 |
-#' if(require(PoiClaClu)) |
|
47 |
-#' { |
|
48 |
-#' readCounts <- CountDataSet(n = 100, p = 1000, 2, 5, 0.1) |
|
49 |
-#' # Rows are for features, columns are for samples. |
|
50 |
-#' trainData <- readCounts[['x']] |
|
51 |
-#' trainClasses <- factor(paste("Class", readCounts[['y']])) |
|
52 |
-#' testData <- readCounts[['xte']] |
|
53 |
-#' storage.mode(trainData) <- storage.mode(testData) <- "integer" |
|
54 |
-#' classified <- classifyInterface(trainData, trainClasses, testData) |
|
55 |
-#' |
|
56 |
-#' setNames(table(paste("Class", readCounts[["yte"]]) == classified), c("Incorrect", "Correct")) |
|
57 |
-#' } |
|
58 |
-#' |
|
59 |
-#' @usage NULL |
|
60 |
-#' @export |
|
61 |
-setGeneric("classifyInterface", function(countsTrain, ...) |
|
62 |
-standardGeneric("classifyInterface")) |
|
1 |
+# An Interface for PoiClaClu Package's Classify Function. Poisson LDA for counts. |
|
63 | 2 |
|
64 |
-#' @rdname classifyInterface |
|
65 |
-#' @export |
|
66 |
-setMethod("classifyInterface", "matrix", # Matrix of integer measurements. |
|
67 |
- function(countsTrain, classesTrain, countsTest, ...) |
|
68 |
-{ |
|
69 |
- classifyInterface(S4Vectors::DataFrame(countsTrain, check.names = FALSE), |
|
70 |
- classesTrain, |
|
71 |
- S4Vectors::DataFrame(countsTest, check.names = FALSE), ...) |
|
72 |
-}) |
|
73 |
- |
|
74 |
-# Sample information data or one of the other inputs, transformed. |
|
75 |
-#' @rdname classifyInterface |
|
76 |
-#' @export |
|
77 |
-setMethod("classifyInterface", "DataFrame", function(countsTrain, classesTrain, countsTest, ..., |
|
78 |
- returnType = c("both", "class", "score"), verbose = 3) |
|
3 |
+classifyInterface <- function(countsTrain, classesTrain, countsTest, ..., |
|
4 |
+ returnType = c("both", "class", "score"), verbose = 3) |
|
79 | 5 |
{ |
80 | 6 |
if(!requireNamespace("PoiClaClu", quietly = TRUE)) |
81 | 7 |
stop("The package 'PoiClaClu' could not be found. Please install it.") |
82 | 8 |
returnType <- match.arg(returnType) |
83 | 9 |
|
84 |
- # Ensure that any non-integer variables are removed from the training and testing matrices. |
|
85 |
- splitDataset <- .splitDataAndOutcome(countsTrain, classesTrain, restrict = "integer") |
|
86 |
- classesTrain <- splitDataset[["outcome"]] |
|
87 |
- trainingMatrix <- as.matrix(splitDataset[["measurements"]]) |
|
88 |
- isInteger <- sapply(countsTest, is.integer) |
|
89 |
- testingMatrix <- as.matrix(countsTest[, isInteger, drop = FALSE]) |
|
90 |
- .checkVariablesAndSame(trainingMatrix, testingMatrix) |
|
91 |
- |
|
92 | 10 |
if(verbose == 3) |
93 | 11 |
message("Fitting Poisson LDA classifier to training data and making predictions on test data.") |
94 | 12 |
|
... | ... |
@@ -99,18 +17,5 @@ setMethod("classifyInterface", "DataFrame", function(countsTrain, classesTrain, |
99 | 17 |
switch(returnType, class = classPredictions, # Factor vector. |
100 | 18 |
score = classScores, # Numeric matrix. |
101 | 19 |
both = data.frame(class = classPredictions, classScores, check.names = FALSE)) |
102 |
-}) |
|
103 |
- |
|
104 |
-#' @rdname classifyInterface |
|
105 |
-#' @export |
|
106 |
-setMethod("classifyInterface", "MultiAssayExperiment", |
|
107 |
-function(countsTrain, countsTest, targets = names(countsTrain), classesTrain, ...) |
|
108 |
-{ |
|
109 |
- tablesAndOutcome <- .MAEtoWideTable(countsTrain, targets, classesTrain, "integer") |
|
110 |
- trainingMatrix <- tablesAndOutcome[["dataTable"]] |
|
111 |
- classesTrain <- tablesAndOutcome[["outcome"]] |
|
112 |
- testingMatrix <- .MAEtoWideTable(countsTest, targets, "integer") |
|
113 |
- |
|
114 |
- .checkVariablesAndSame(trainingMatrix, testingMatrix) |
|
115 |
- classifyInterface(trainingMatrix, classesTrain, testingMatrix, ...) |
|
116 |
-}) |
|
117 | 20 |
\ No newline at end of file |
21 |
+} |
|
22 |
+attr(classifyInterface, "name") <- "classifyInterface" |
|
118 | 23 |
\ No newline at end of file |
... | ... |
@@ -1,151 +1,18 @@ |
1 |
-################################################################################ |
|
2 |
-# |
|
3 |
-# Train interface |
|
4 |
-# |
|
5 |
-################################################################################ |
|
6 |
- |
|
7 |
- |
|
8 |
-#' An Interface for survival Package's coxph Function |
|
9 |
-#' |
|
10 |
-#' Cox proportional hazards. |
|
11 |
-#' |
|
12 |
-#' @aliases coxphInterface coxphTrainInterface coxphPredictInterface |
|
13 |
-#' coxphInterface,matrix-method |
|
14 |
-#' coxphInterface,DataFrame-method |
|
15 |
-#' coxphInterface,MultiAssayExperiment-method |
|
16 |
-#' coxphPredictInterface |
|
17 |
-#' coxphPredictInterface,coxph,matrix-method |
|
18 |
-#' coxphPredictInterface,coxph,DataFrame-method |
|
19 |
-#' coxphPredictInterface,coxph,MultiAssayExperiment-method |
|
20 |
-#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
21 |
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
22 |
-#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
23 |
-#' @param survivalTrain A tabular data type of survival information of the |
|
24 |
-#' same number of rows as the number of samples in \code{measurementsTrain} and 2 to 3 columns if it is a |
|
25 |
-#' \code{\link{matrix}} or a \code{\link{DataFrame}}, or a character vector of length 2 to 3 containing the |
|
26 |
-#' column names in \code{measurementsTrain} if it is a \code{\link{DataFrame}} or the |
|
27 |
-#' column name in \code{colData(measurementsTrain)} if \code{measurementsTrain} is a |
|
28 |
-#' \code{\link{MultiAssayExperiment}}. If a vector of column names, those columns will be |
|
29 |
-#' removed before training. |
|
30 |
-#' @param model A trained coxph classifier, as created by |
|
31 |
-#' \code{coxphTrainInterface}, which has the same form as the output of |
|
32 |
-#' \code{\link[survival]{coxph}}. |
|
33 |
-#' @param measurementsTest An object of the same class as \code{measurementsTrain} with no |
|
34 |
-#' samples in common with \code{measurementsTrain} and the same number of features |
|
35 |
-#' as it. |
|
36 |
-#' @param targets If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
37 |
-#' names of the data tables to be used. \code{"sampleInfo"} is also a valid value |
|
38 |
-#' and specifies that integer variables from the clinical data table will be |
|
39 |
-#' used. |
|
40 |
-#' @param ... Variables not used by the \code{matrix} nor the |
|
41 |
-#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
42 |
-#' \code{DataFrame} method (e.g. \code{verbose}) or options which are accepted |
|
43 |
-#' by the \code{\link[survival]{coxph}} or \code{\link[survival]{predict.coxph}} functions. |
|
44 |
-#' @param verbose Default: 3. A number between 0 and 3 for the amount of |
|
45 |
-#' progress messages to give. This function only prints progress messages if |
|
46 |
-#' the value is 3. |
|
47 |
-#' @return For \code{coxphTrainInterface}, the trained Cox proportional hazards model. |
|
48 |
-#' For \code{coxphPredictInterface}, a risk score prediction for each sample. |
|
49 |
-#' @examples |
|
50 |
-#' #' |
|
51 |
-#' # if(require(randomForest)) |
|
52 |
-#' # { |
|
53 |
-#' # # Genes 76 to 100 have differential expression. |
|
54 |
-#' # genesMatrix <- sapply(1:25, function(sample) c(rnorm(100, 9, 2))) |
|
55 |
-#' # genesMatrix <- cbind(genesMatrix, sapply(1:25, function(sample) |
|
56 |
-#' # c(rnorm(75, 9, 2), rnorm(25, 14, 2)))) |
|
57 |
-#' # classes <- factor(rep(c("Poor", "Good"), each = 25)) |
|
58 |
-#' # colnames(genesMatrix) <- paste("Sample", 1:ncol(genesMatrix), sep = ' ') |
|
59 |
-#' # rownames(genesMatrix) <- paste("Gene", 1:nrow(genesMatrix), sep = '-') |
|
60 |
-#' # trainingSamples <- c(1:20, 26:45) |
|
61 |
-#' # testingSamples <- c(21:25, 46:50) |
|
62 |
-#' # |
|
63 |
-#' # trained <- randomForestTrainInterface(genesMatrix[, trainingSamples], |
|
64 |
-#' # classes[trainingSamples]) |
|
65 |
-#' # predicted <- randomForestPredictInterface(trained, genesMatrix[, testingSamples]) |
|
66 |
-#' # } |
|
67 |
-#' |
|
68 |
-#' @importFrom survival coxph concordance |
|
69 |
-#' @rdname coxphInterface |
|
70 |
-#' @usage NULL |
|
71 |
-#' @export |
|
72 |
-setGeneric("coxphTrainInterface", function(measurementsTrain, ...) standardGeneric("coxphTrainInterface")) |
|
73 |
- |
|
74 |
-#' @rdname coxphInterface |
|
75 |
-#' @export |
|
76 |
-setMethod("coxphTrainInterface", "matrix", function(measurementsTrain, survivalTrain, ...) |
|
77 |
-{ |
|
78 |
- coxphTrainInterface(S4Vectors::DataFrame(measurementsTrain, check.names = FALSE), survivalTrain, ...) |
|
79 |
-}) |
|
80 |
- |
|
81 |
-# Clinical data or one of the other inputs, transformed. |
|
82 |
-#' @rdname coxphInterface |
|
83 |
-#' @export |
|
84 |
-setMethod("coxphTrainInterface", "DataFrame", function(measurementsTrain, survivalTrain, ..., verbose = 3) |
|
1 |
+# An Interface for survival Package's coxph Function. The standard Cox proportional hazards. |
|
2 |
+coxphTrainInterface <- function(measurementsTrain, survivalTrain, ..., verbose = 3) |
|
85 | 3 |
{ |
86 | 4 |
if(!requireNamespace("survival", quietly = TRUE)) |
87 | 5 |
stop("The package 'survival' could not be found. Please install it.") |
88 | 6 |
if(verbose == 3) |
89 | 7 |
message("Fitting coxph classifier to training data and making predictions on test |
90 | 8 |
data.") |
91 |
- |
|
92 |
- splitDataset <- .splitDataAndOutcome(measurementsTrain, survivalTrain) |
|
93 |
- survivalTrain <- splitDataset[["outcome"]] |
|
94 |
- measurementsTrain <- splitDataset[["measurements"]] |
|
95 | 9 |
|
96 | 10 |
survival::coxph(survivalTrain ~ ., measurementsTrain) |
97 |
-}) |
|
98 |
- |
|
99 |
-#' @rdname coxphInterface |
|
100 |
-#' @export |
|
101 |
-setMethod("coxphTrainInterface", "MultiAssayExperiment", function(measurementsTrain, targets = names(measurementsTrain), survivalTrain, ...) |
|
102 |
-{ |
|
103 |
- tablesAndSurvival <- .MAEtoWideTable(measurementsTrain, targets, survivalTrain, restrict = NULL) |
|
104 |
- measurementsTrain <- tablesAndSurvival[["dataTable"]] |
|
105 |
- survivalTrain <- tablesAndSurvival[["outcome"]] |
|
106 |
- |
|
107 |
- coxphTrainInterface(measurementsTrain, survivalTrain, ...) |
|
108 |
-}) |
|
109 |
- |
|
11 |
+} |
|
12 |
+attr(coxphTrainInterface, "name") <- "coxphTrainInterface" |
|
110 | 13 |
|
111 |
- |
|
112 |
-################################################################################ |
|
113 |
-# |
|
114 |
-# Predict Interface |
|
115 |
-# |
|
116 |
-################################################################################ |
|
117 |
- |
|
118 |
- |
|
119 |
-#' @rdname coxphInterface |
|
120 |
-#' @usage NULL |
|
121 |
-#' @export |
|
122 |
-setGeneric("coxphPredictInterface", function(model, measurementsTest, ...) |
|
123 |
- standardGeneric("coxphPredictInterface")) |
|
124 |
- |
|
125 |
-#' @rdname coxphInterface |
|
126 |
-#' @export |
|
127 |
-setMethod("coxphPredictInterface", c("coxph", "matrix"), # Matrix of numeric measurements. |
|
128 |
- function(model, measurementsTest, ...) |
|
129 |
-{ |
|
130 |
- coxphPredictInterface(model, S4Vectors::DataFrame(measurementsTest, check.names = FALSE), ...) |
|
131 |
-}) |
|
132 |
- |
|
133 |
-#' @rdname coxphInterface |
|
134 |
-#' @export |
|
135 |
-setMethod("coxphPredictInterface", c("coxph", "DataFrame"), |
|
136 |
-function(model, measurementsTest, ..., verbose = 3) |
|
14 |
+# model is of class coxph. |
|
15 |
+coxphPredictInterface <- function(model, measurementsTest, ..., verbose = 3) |
|
137 | 16 |
{ |
138 | 17 |
predict(model, as.data.frame(measurementsTest), type = "risk") |
139 |
-}) |
|
140 |
- |
|
141 |
-# One or more omics data sets, possibly with clinical data. |
|
142 |
-#' @rdname coxphInterface |
|
143 |
-#' @export |
|
144 |
-setMethod("coxphPredictInterface", c("coxph", "MultiAssayExperiment"), |
|
145 |
- function(model, measurementsTest, targets = names(measurementsTest), ...) |
|
146 |
-{ |
|
147 |
- testingTable <- .MAEtoWideTable(measurementsTest, targets) |
|
148 |
- coxphPredictInterface(model, testingTable, ...) |
|
149 |
-}) |
|
150 |
- |
|
151 |
- |
|
18 |
+} |
|
152 | 19 |
\ No newline at end of file |
... | ... |
@@ -1,168 +1,29 @@ |
1 |
-################################################################################ |
|
2 |
-# |
|
3 |
-# Train Interface |
|
4 |
-# |
|
5 |
-################################################################################ |
|
1 |
+# An Interface for glmnet Package's coxnet Function. Survival modelling with sparsity. |
|
6 | 2 |
|
7 |
- |
|
8 |
- |
|
9 |
- |
|
10 |
-#' An Interface for glmnet Package's coxnet Function |
|
11 |
-#' |
|
12 |
-#' An elastic net GLM classifier uses a penalty which is a combination of a |
|
13 |
-#' lasso penalty and a ridge penalty, scaled by a lambda value, to fit a sparse |
|
14 |
-#' linear model to the data. |
|
15 |
-#' |
|
16 |
-#' The value of the \code{family} parameter is fixed to \code{"cox"} so |
|
17 |
-#' that classification with survival is possible. |
|
18 |
-#' During classifier training, if more than one lambda value |
|
19 |
-#' is considered by specifying a vector of them as input or leaving the default |
|
20 |
-#' value of NULL, then the chosen value is determined based on classifier |
|
21 |
-#' resubstitution error rate. |
|
22 |
-#' |
|
23 |
-#' @aliases coxnetInterface coxnetTrainInterface |
|
24 |
-#' coxnetPredictInterface coxnetTrainInterface,matrix-method |
|
25 |
-#' coxnetTrainInterface,DataFrame-method |
|
26 |
-#' coxnetTrainInterface,MultiAssayExperiment-method |
|
27 |
-#' coxnetPredictInterface,multnet,matrix-method |
|
28 |
-#' coxnetPredictInterface,multnet,DataFrame-method |
|
29 |
-#' coxnetPredictInterface,multnet,MultiAssayExperiment-method |
|
30 |
-#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
31 |
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
32 |
-#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
33 |
-#' @param survivalTrain A tabular data type of survival information of the |
|
34 |
-#' same number of rows as the number of samples in \code{measurementsTrain} and 2 to 3 columns if it is a |
|
35 |
-#' \code{\link{matrix}} or a \code{\link{DataFrame}}, or a character vector of length 2 to 3 containing the |
|
36 |
-#' column names in \code{measurementsTrain} if it is a \code{\link{DataFrame}} or the |
|
37 |
-#' column name in \code{colData(measurementsTrain)} if \code{measurementsTrain} is a |
|
38 |
-#' \code{\link{MultiAssayExperiment}}. If a vector of column names, those columns will be |
|
39 |
-#' removed before training. |
|
40 |
-#' @param lambda The lambda value passed directly to |
|
41 |
-#' \code{\link[glmnet]{glmnet}} if the training function is used or passed as |
|
42 |
-#' \code{s} to \code{\link[glmnet]{predict.glmnet}} if the prediction function |
|
43 |
-#' is used. |
|
44 |
-#' @param measurementsTest An object of the same class as \code{measurementsTrain} with no |
|
45 |
-#' samples in common with \code{measurements} and the same number of features |
|
46 |
-#' as it. |
|
47 |
-#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the |
|
48 |
-#' names of the data tables to be used. \code{"clinical"} is also a valid value |
|
49 |
-#' and specifies that integer variables from the clinical data table will be |
|
50 |
-#' used. |
|
51 |
-#' @param ... Variables not used by the \code{matrix} nor the |
|
52 |
-#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
53 |
-#' \code{DataFrame} method (e.g. \code{verbose}) or, for the training function, |
|
54 |
-#' options that are used by the \code{glmnet} function. For the testing |
|
55 |
-#' function, this variable simply contains any parameters passed from the |
|
56 |
-#' classification framework to it which aren't used by glmnet's \code{predict} |
|
57 |
-#' fuction. |
|
58 |
-#' @param model A trained coxnet, as created by the \code{glmnet} |
|
59 |
-#' function. |
|
60 |
-#' @param survivalTest A \code{\link{Surv}} object or columns from the \code{measurementsTest} table |
|
61 |
-#' which contains the follow-up time and status information. |
|
62 |
-#' @param verbose Default: 3. A number between 0 and 3 for the amount of |
|
63 |
-#' progress messages to give. This function only prints progress messages if |
|
64 |
-#' the value is 3. |
|
65 |
-#' @return For \code{coxnetTrainInterface}, an object of type |
|
66 |
-#' \code{glmnet}. For \code{coxnetPredictInterface}, a vector of relative risks. |
|
67 |
-#' @examples |
|
68 |
-#' if(require(glmnet)) |
|
69 |
-#' { |
|
70 |
-#' set.seed(51773) |
|
71 |
-#' proteinMatrix <- matrix(rnorm(20*10), nrow = 20, ncol = 10) |
|
72 |
-#' survivalOutcome <- Surv(time = rpois(20,20), event = rbinom(20, 1, 0.2)) |
|
73 |
-#' |
|
74 |
-#' trained <- coxnetTrainInterface(proteinMatrix, |
|
75 |
-#' survivalOutcome) |
|
76 |
-#' |
|
77 |
-#' # Resubstituting training data |
|
78 |
-#' predicted <- coxnetPredictInterface(trained, proteinMatrix) |
|
79 |
-#' |
|
80 |
-#' } |
|
81 |
-#' @rdname coxnetInterface |
|
82 |
-#' @export |
|
83 |
-setGeneric("coxnetTrainInterface", function(measurementsTrain, ...) |
|
84 |
- standardGeneric("coxnetTrainInterface")) |
|
85 |
- |
|
86 |
-#' @rdname coxnetInterface |
|
87 |
-#' @export |
|
88 |
-setMethod("coxnetTrainInterface", "matrix", # Matrix of numeric measurements. |
|
89 |
- function(measurementsTrain, survivalTrain, ...) |
|
90 |
- { |
|
91 |
- coxnetTrainInterface(S4Vectors::DataFrame(measurementsTrain, check.names = FALSE), survivalTrain, ...) |
|
92 |
- }) |
|
93 |
- |
|
94 |
-# Clinical data or one of the other inputs, transformed. |
|
95 |
-#' @rdname coxnetInterface |
|
96 |
-#' @export |
|
97 |
-setMethod("coxnetTrainInterface", "DataFrame", function(measurementsTrain, survivalTrain, lambda = NULL, ..., verbose = 3) |
|
3 |
+coxnetTrainInterface <- function(measurementsTrain, survivalTrain, lambda = NULL, ..., verbose = 3) |
|
98 | 4 |
{ |
99 | 5 |
if(!requireNamespace("glmnet", quietly = TRUE)) |
100 | 6 |
stop("The package 'glmnet' could not be found. Please install it.") |
101 | 7 |
if(verbose == 3) |
102 | 8 |
message("Fitting coxnet model to data.") |
103 |
- |
|
104 |
- splitDataset <- .splitDataAndOutcome(measurementsTrain, survivalTrain) |
|
105 |
- measurementsTrain <- data.frame(splitDataset[["measurements"]], check.names = FALSE) |
|
9 |
+ |
|
10 |
+ measurementsTrain <- data.frame(measurementsTrain, check.names = FALSE) |
|
106 | 11 |
measurementsMatrix <- glmnet::makeX(as(measurementsTrain, "data.frame")) |
107 | 12 |
|
108 | 13 |
# The response variable is a Surv class of object. |
109 |
- fit <- glmnet::cv.glmnet(measurementsMatrix, splitDataset[["outcome"]], family = "cox", type = "C", ...) |
|
14 |
+ fit <- glmnet::cv.glmnet(measurementsMatrix, survivalTrain, family = "cox", type = "C", ...) |
|
110 | 15 |
fitted <- fit$glmnet.fit |
111 | 16 |
|
112 | 17 |
offset <- -mean(predict(fitted, measurementsMatrix, s = fit$lambda.min, type = "link")) |
113 | 18 |
attr(fitted, "tune") <- list(lambda = fit$lambda.min, offset = offset) |
114 | 19 |
|
115 | 20 |
fitted |
116 |
-}) |
|
117 |
- |
|
118 |
-# One or more omics datasets, possibly with clinical data. |
|
119 |
-#' @rdname coxnetTrainInterface |
|
120 |
-#' @export |
|
121 |
-setMethod("coxnetTrainInterface", "MultiAssayExperiment", |
|
122 |
- function(measurementsTrain, targets = names(measurementsTrain), survivalTrain, ...) |
|
123 |
- { |
|
124 |
- tablesAndClasses <- .MAEtoWideTable(measurementsTrain, targets, survivalTrain) |
|
125 |
- measurementsTrain <- tablesAndClasses[["dataTable"]] |
|
126 |
- survivalTrain <- tablesAndClasses[["outcome"]] |
|
127 |
- |
|
128 |
- if(ncol(measurementsTrain) == 0) |
|
129 |
- stop("No variables in data tables specified by \'targets\' are numeric.") |
|
130 |
- else |
|
131 |
- coxnetTrainInterface(measurementsTrain, survivalTrain, ...) |
|
132 |
- }) |
|
133 |
- |
|
134 |
- |
|
21 |
+} |
|
22 |
+attr(coxnetTrainInterface, "name") <- "coxnetTrainInterface" |
|
135 | 23 |
|
136 |
-################################################################################ |
|
137 |
-# |
|
138 |
-# Predict Interface |
|
139 |
-# |
|
140 |
-################################################################################ |
|
141 |
- |
|
142 |
-# Matrix of numeric measurements. |
|
143 |
-#' @rdname coxnetInterface |
|
144 |
-#' @export |
|
145 |
-setGeneric("coxnetPredictInterface", function(model, measurementsTest, ...) |
|
146 |
- standardGeneric("coxnetPredictInterface")) |
|
147 |
- |
|
148 |
-#' @rdname coxnetInterface |
|
149 |
-#' @export |
|
150 |
-setMethod("coxnetPredictInterface", c("coxnet", "matrix"), |
|
151 |
- function(model, measurementsTest, ...) |
|
152 |
- { |
|
153 |
- coxnetPredictInterface(model, S4Vectors::DataFrame(measurementsTest, check.names = FALSE), ...) |
|
154 |
- }) |
|
155 |
- |
|
156 |
-#' @rdname coxnetInterface |
|
157 |
-#' @export |
|
158 |
-setMethod("coxnetPredictInterface", c("coxnet", "DataFrame"), function(model, measurementsTest, survivalTest = NULL, lambda, ..., verbose = 3) |
|
24 |
+# model is of class coxnet. |
|
25 |
+coxnetPredictInterface <- function(model, measurementsTest, survivalTest = NULL, lambda, ..., verbose = 3) |
|
159 | 26 |
{ # ... just consumes emitted tuning variables from .doTrain which are unused. |
160 |
- if(!is.null(survivalTest)) |
|
161 |
- { |
|
162 |
- splitDataset <- .splitDataAndOutcome(measurementsTest, survivalTest) # Remove any classes, if present. |
|
163 |
- measurementsTest <- splitDataset[["measurements"]] |
|
164 |
- } |
|
165 |
- |
|
166 | 27 |
if(!requireNamespace("glmnet", quietly = TRUE)) |
167 | 28 |
stop("The package 'glmnet' could not be found. Please install it.") |
168 | 29 |
if(verbose == 3) |
... | ... |
@@ -180,24 +41,4 @@ setMethod("coxnetPredictInterface", c("coxnet", "DataFrame"), function(model, me |
180 | 41 |
survScores <- predict(model, testMatrix, s = lambda, type = "response", newoffset = offset) |
181 | 42 |
|
182 | 43 |
survScores[, 1] |
183 |
-}) |
|
184 |
- |
|
185 |
-# One or more omics data sets, possibly with sample information data. |
|
186 |
-#' @rdname coxnetInterface |
|
187 |
-#' @export |
|
188 |
-setMethod("coxnetPredictInterface", c("coxnet", "MultiAssayExperiment"), |
|
189 |
- function(model, measurementsTest, targets = names(measurementsTest), ...) |
|
190 |
- { |
|
191 |
- tables <- .MAEtoWideTable(measurementsTest, targets) |
|
192 |
- measurementsTest <- tables[["dataTable"]] |
|
193 |
- |
|
194 |
- coxnetPredictInterface(model, measurementsTest, ...) |
|
195 |
- }) |
|
196 |
- |
|
197 |
- |
|
198 |
- |
|
199 |
-################################################################################ |
|
200 |
-# |
|
201 |
-# Get selected features |
|
202 |
-# |
|
203 |
-################################################################################ |
|
44 |
+} |
|
204 | 45 |
\ No newline at end of file |
... | ... |
@@ -1,91 +1,6 @@ |
1 |
-#' An Interface for sparsediscrim Package's dlda Function |
|
2 |
-#' |
|
3 |
-#' \code{DLDAtrainInterface} generates a trained diagonal LDA classifier and |
|
4 |
-#' \code{DLDApredictInterface} uses it to make predictions on a test data set. |
|
5 |
-#' |
|
6 |
-#' @name DLDA Interface |
|
7 |
-#' @aliases DLDAtrainInterface DLDAtrainInterface,matrix-method |
|
8 |
-#' DLDAtrainInterface,DataFrame-method |
|
9 |
-#' DLDAtrainInterface,MultiAssayExperiment-method DLDApredictInterface |
|
10 |
-#' DLDApredictInterface,dlda,matrix-method |
|
11 |
-#' DLDApredictInterface,dlda,DataFrame-method |
|
12 |
-#' DLDApredictInterface,dlda,MultiAssayExperiment-method |
|
13 |
-#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
14 |
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
15 |
-#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
16 |
-#' If of type \code{\link{DataFrame}} or \code{\link{MultiAssayExperiment}}, the data set is subset |
|
17 |
-#' to only those features of type \code{numeric}. |
|
18 |
-#' @param classesTrain A vector of class labels of class \code{\link{factor}} of the |
|
19 |
-#' same length as the number of samples in \code{measurementsTrain} if it is a |
|
20 |
-#' \code{\link{matrix}} or a \code{\link{DataFrame}} or a character vector of length 1 |
|
21 |
-#' containing the column name in \code{measurementsTrain} if it is a \code{\link{DataFrame}} or the |
|
22 |
-#' column name in \code{colData(measurementsTrain)} if \code{measurementsTrain} is a |
|
23 |
-#' \code{\link{MultiAssayExperiment}}. If a column name, that column will be |
|
24 |
-#' removed before training. |
|
25 |
-#' @param model A fitted model as returned by \code{DLDAtrainInterface}. |
|
26 |
-#' @param measurementsTest An object of the same class as \code{measurementsTrain} with no |
|
27 |
-#' samples in common with \code{measurementsTrain} and the same number of features |
|
28 |
-#' as it. Also, if a \code{DataFrame}, the \code{class} column must be absent. |
|
29 |
-#' @param targets If \code{measurements} is a \code{MultiAssayExperiment}, the |
|
30 |
-#' names of the data tables to be used. \code{"sampleInfo"} is also a valid value |
|
31 |
-#' and specifies that integer variables from the sample information data table will be |
|
32 |
-#' used. |
|
33 |
-#' @param ... Variables not used by the \code{matrix} nor the |
|
34 |
-#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
35 |
-#' \code{DataFrame} method (e.g. \code{verbose}). |
|
36 |
-#' @param returnType Default: \code{"both"}. Either \code{"class"}, |
|
37 |
-#' \code{"score"} or \code{"both"}. Sets the return value from the prediction |
|
38 |
-#' to either a vector of class labels, matrix of scores for each class, or both |
|
39 |
-#' labels and scores in a \code{data.frame}. |
|
40 |
-#' @param verbose Default: 3. A number between 0 and 3 for the amount of |
|
41 |
-#' progress messages to give. This function only prints progress messages if |
|
42 |
-#' the value is 3. |
|
43 |
-#' @return For \code{DLDAtrainInterface}, a trained DLDA classifier. For |
|
44 |
-#' \code{DLDApredictInterface}, either a factor vector of predicted classes, a |
|
45 |
-#' matrix of scores for each class, or a table of both the class labels and |
|
46 |
-#' class scores, depending on the setting of \code{returnType}. |
|
47 |
-#' @author Dario Strbenac |
|
48 |
-#' @examples |
|
49 |
-#' |
|
50 |
-#' # if(require(sparsediscrim)) Package currently removed from CRAN. |
|
51 |
-#' #{ |
|
52 |
-#' # Genes 76 to 100 have differential expression. |
|
53 |
-#' genesMatrix <- sapply(1:100, function(sample) rnorm(25, 9, 0.3)) |
|
54 |
-#' genesMatrix <- rbind(genesMatrix, t(sapply(1:25, function(sample) |
|
55 |
-#' c(rnorm(75, 9, 0.3), rnorm(25, 14, 0.3))))) |
|
56 |
-#' classes <- factor(rep(c("Poor", "Good"), each = 25)) |
|
57 |
-#' rownames(genesMatrix) <- paste("Sample", 1:nrow(genesMatrix)) |
|
58 |
-#' colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix)) |
|
59 |
-#' selected <- colnames(genesMatrix)[91:100] |
|
60 |
-#' trainingSamples <- c(1:20, 26:45) |
|
61 |
-#' testingSamples <- c(21:25, 46:50) |
|
62 |
-#' |
|
63 |
-#' classifier <- DLDAtrainInterface(genesMatrix[trainingSamples, selected], |
|
64 |
-#' classes[trainingSamples]) |
|
65 |
-#' DLDApredictInterface(classifier, genesMatrix[testingSamples, selected]) |
|
66 |
-#' #} |
|
67 |
-#' |
|
68 |
-#' @include classes.R |
|
69 |
-#' @rdname DLDAinterface |
|
70 |
-#' @usage NULL |
|
71 |
-#' @export |
|
72 |
-setGeneric("DLDAtrainInterface", function(measurementsTrain, ...) standardGeneric("DLDAtrainInterface")) |
|
73 |
- |
|
74 |
-#' @rdname DLDAinterface |
|
75 |
-#' @export |
|
76 |
-setMethod("DLDAtrainInterface", "matrix", function(measurementsTrain, classesTrain, ...) # Matrix of numeric measurements. |
|
77 |
-{ |
|
78 |
- DLDAtrainInterface(S4Vectors::DataFrame(measurementsTrain, check.names = FALSE), classesTrain, ...) |
|
79 |
-}) |
|
80 |
- |
|
81 |
-#' @rdname DLDAinterface |
|
82 |
-#' @export |
|
83 |
-setMethod("DLDAtrainInterface", "DataFrame", function(measurementsTrain, classesTrain, verbose = 3) |
|
1 |
+# An Interface for sparsediscrim Package's dlda Function. Diagonal linear discriminant analysis. |
|
2 |
+DLDAtrainInterface <- function(measurementsTrain, classesTrain, verbose = 3) |
|
84 | 3 |
{ |
85 |
- splitDataset <- .splitDataAndOutcome(measurementsTrain, classesTrain) |
|
86 |
- trainingMatrix <- as.matrix(splitDataset[["measurements"]]) # DLDA demands matrix input type. |
|
87 |
- classesTrain <- splitDataset[["outcome"]] |
|
88 |
- |
|
89 | 4 |
#if(!requireNamespace("sparsediscrim", quietly = TRUE)) |
90 | 5 |
#stop("The package 'sparsediscrim' could not be found. Please install it.") |
91 | 6 |
if(verbose == 3) |
... | ... |
@@ -93,44 +8,19 @@ setMethod("DLDAtrainInterface", "DataFrame", function(measurementsTrain, classes |
93 | 8 |
|
94 | 9 |
# sparsediscrim::dlda(as.matrix(measurements), classes) |
95 | 10 |
.dlda(as.matrix(measurementsTrain), classesTrain) |
96 |
-}) |
|
11 |
+} |
|
12 |
+attr(DLDAtrainInterface, "name") <- "DLDAtrainInterface" |
|
97 | 13 |
|
98 |
-#' @rdname DLDAinterface |
|
99 |
-#' @export |
|
100 |
-setMethod("DLDAtrainInterface", "MultiAssayExperiment", function(measurementsTrain, targets = names(measurementsTrain), classesTrain, ...) |
|
101 |
-{ |
|
102 |
- tablesAndClasses <- .MAEtoWideTable(measurementsTrain, targets, classesTrain) |
|
103 |
- measurementsTrain <- tablesAndClasses[["dataTable"]] |
|
104 |
- classesTrain <- tablesAndClasses[["outcome"]] |
|
105 |
- |
|
106 |
- if(ncol(measurementsTrain) == 0) |
|
107 |
- stop("No variables in data tables specified by \'targets\' are numeric.") |
|
108 |
- else |
|
109 |
- DLDAtrainInterface(measurementsTrain, classesTrain, ...) |
|
110 |
-}) |
|
111 |
- |
|
112 |
- |
|
113 |
-#' @rdname DLDAinterface |
|
114 |
-#' @usage NULL |
|
115 |
-#' @export |
|
116 |
-setGeneric("DLDApredictInterface", function(model, measurementsTest, ...) standardGeneric("DLDApredictInterface")) |
|
117 |
- |
|
118 |
-#' @rdname DLDAinterface |
|
119 |
-#' @export |
|
120 |
-setMethod("DLDApredictInterface", c("dlda", "matrix"), function(model, measurementsTest, ...) |
|
121 |
-{ |
|
122 |
- DLDApredictInterface(model, S4Vectors::DataFrame(measurementsTest, check.names = FALSE), ...) |
|
123 |
-}) |
|
124 |
- |
|
125 |
-#' @rdname DLDAinterface |
|
126 |
-#' @export |
|
127 |
-setMethod("DLDApredictInterface", c("dlda", "DataFrame"), function(model, measurementsTest, returnType = c("both", "class", "score"), verbose = 3) |
|
14 |
+# model is of class dlda. |
|
15 |
+DLDApredictInterface <- function(model, measurementsTest, returnType = c("both", "class", "score"), |
|
16 |
+ verbose = 3) |
|
128 | 17 |
{ |
129 | 18 |
isNumeric <- sapply(measurementsTest, is.numeric) |
130 | 19 |
measurementsTest <- measurementsTest[, isNumeric, drop = FALSE] |
131 | 20 |
returnType <- match.arg(returnType) |
132 | 21 |
|
133 |
- # sparsediscrim doesn't match feature names to those inside trained model. Calculations could go wrong. |
|
22 |
+ # sparsediscrim doesn't match feature names to those inside trained model. |
|
23 |
+ # Ensure that there is no chance of mismatched columns. |
|
134 | 24 |
measurementsTest <- measurementsTest[, names(model[["var_pool"]])] |
135 | 25 |
|
136 | 26 |
#if(!requireNamespace("sparsediscrim", quietly = TRUE)) # Removed from CRAN, sadly. |
... | ... |
@@ -139,22 +29,9 @@ setMethod("DLDApredictInterface", c("dlda", "DataFrame"), function(model, measur |
139 | 29 |
message("Predicting classes using trained DLDA classifier.") |
140 | 30 |
|
141 | 31 |
#predict(model, as.matrix(test)) |
142 |
- predictions <- .predict(model, as.matrix(measurementsTest)) # Copy in utilities.R. |
|
32 |
+ predictions <- .predict(model, as.matrix(measurementsTest)) # Copy located in utilities.R. |
|
143 | 33 |
|
144 | 34 |
switch(returnType, class = predictions[["class"]], # Factor vector. |
145 | 35 |
score = predictions[["posterior"]][, model[["groups"]]], # Numeric matrix. |
146 | 36 |
both = data.frame(class = predictions[["class"]], predictions[["posterior"]], check.names = FALSE)) |
147 |
-}) |
|
148 |
- |
|
149 |
-#' @rdname DLDAinterface |
|
150 |
-#' @export |
|
151 |
-setMethod("DLDApredictInterface", c("dlda", "MultiAssayExperiment"), function(model, measurementsTest, targets = names(measurementsTest), ...) |
|
152 |
-{ |
|
153 |
- tablesAndClasses <- .MAEtoWideTable(measurementsTest, targets) |
|
154 |
- measurementsTest <- tablesAndClasses[["dataTable"]] |
|
155 |
- |
|
156 |
- if(ncol(measurementsTest) == 0) |
|
157 |
- stop("No variables in data tables specified by \'targets\' are numeric.") |
|
158 |
- else |
|
159 |
- DLDApredictInterface(model, measurementsTest, ...) |
|
160 |
-}) |
|
161 | 37 |
\ No newline at end of file |
38 |
+} |
|
162 | 39 |
\ No newline at end of file |
... | ... |
@@ -1,182 +1,31 @@ |
1 |
-################################################################################ |
|
2 |
-# |
|
3 |
-# Train Interface |
|
4 |
-# |
|
5 |
-################################################################################ |
|
6 |
- |
|
7 |
-#' An Interface for glmnet Package's glmnet Function |
|
8 |
-#' |
|
9 |
-#' An elastic net GLM classifier uses a penalty which is a combination of a |
|
10 |
-#' lasso penalty and a ridge penalty, scaled by a lambda value, to fit a sparse |
|
11 |
-#' linear model to the data. |
|
12 |
-#' |
|
13 |
-#' The value of the \code{family} parameter is fixed to \code{"multinomial"} so |
|
14 |
-#' that classification with more than 2 classes is possible. During classifier training, if more than one lambda value |
|
15 |
-#' is considered by specifying a vector of them as input or leaving the default |
|
16 |
-#' value of NULL, then the chosen value is determined based on classifier |
|
17 |
-#' resubstitution error rate. |
|
18 |
-#' |
|
19 |
-#' @aliases elasticNetGLMinterface elasticNetGLMtrainInterface |
|
20 |
-#' elasticNetGLMpredictInterface elasticNetGLMtrainInterface,matrix-method |
|
21 |
-#' elasticNetGLMtrainInterface,DataFrame-method |
|
22 |
-#' elasticNetGLMtrainInterface,MultiAssayExperiment-method |
|
23 |
-#' elasticNetGLMpredictInterface,multnet,matrix-method |
|
24 |
-#' elasticNetGLMpredictInterface,multnet,DataFrame-method |
|
25 |
-#' elasticNetGLMpredictInterface,multnet,MultiAssayExperiment-method |
|
26 |
-#' @param measurementsTrain Either a \code{\link{matrix}}, \code{\link{DataFrame}} |
|
27 |
-#' or \code{\link{MultiAssayExperiment}} containing the training data. For a |
|
28 |
-#' \code{matrix} or \code{\link{DataFrame}}, the rows are samples, and the columns are features. |
|
29 |
-#' If of type \code{\link{DataFrame}} or \code{\link{MultiAssayExperiment}}, the data set is subset |
|
30 |
-#' to only those features of type \code{numeric}. |
|
31 |
-#' @param classesTrain A vector of class labels of class \code{\link{factor}} of the |
|
32 |
-#' same length as the number of samples in \code{measurementsTrain} if it is a |
|
33 |
-#' \code{\link{matrix}} or a \code{\link{DataFrame}} or a character vector of length 1 |
|
34 |
-#' containing the column name in \code{measurementsTrain} if it is a \code{\link{DataFrame}} or the |
|
35 |
-#' column name in \code{colData(measurementsTrain)} if \code{measurementsTrain} is a |
|
36 |
-#' \code{\link{MultiAssayExperiment}}. If a column name, that column will be |
|
37 |
-#' removed before training. |
|
38 |
-#' @param lambda The lambda value passed directly to |
|
39 |
-#' \code{\link[glmnet]{glmnet}} if the training function is used or passed as |
|
40 |
-#' \code{s} to \code{\link[glmnet]{predict.glmnet}} if the prediction function |
|
41 |
-#' is used. |
|
42 |
-#' @param measurementsTest An object of the same class as \code{measurementsTrain} with no |
|
43 |
-#' samples in common with \code{measurementsTrain} and the same number of features |
|
44 |
-#' as it. |
|
45 |
-#' @param targets If \code{measurementsTrain} is a \code{MultiAssayExperiment}, the |
|
46 |
-#' names of the data tables to be used. \code{"sampleInfo"} is also a valid value |
|
47 |
-#' and specifies that integer variables from the sample information data table will be |
|
48 |
-#' used. |
|
49 |
-#' @param ... Variables not used by the \code{matrix} nor the |
|
50 |
-#' \code{MultiAssayExperiment} method which are passed into and used by the |
|
51 |
-#' \code{DataFrame} method (e.g. \code{verbose}) or, for the training function, |
|
52 |
-#' options that are used by the \code{glmnet} function. For the testing |
|
53 |
-#' function, this variable simply contains any parameters passed from the |
|
54 |
-#' classification framework to it which aren't used by glmnet's \code{predict} |
|
55 |
-#' function. |
|
56 |
-#' @param model A trained elastic net GLM, as created by the \code{glmnet} |
|
57 |
-#' function. |
|
58 |
-#' @param returnType Default: \code{"both"}. Either \code{"class"}, |
|
59 |
-#' \code{"score"} or \code{"both"}. Sets the return value from the prediction |
|
60 |
-#' to either a vector of class labels, matrix of scores for each class, or both |
|
61 |
-#' labels and scores in a \code{data.frame}. |
|
62 |
-#' @param verbose Default: 3. A number between 0 and 3 for the amount of |
|
63 |
-#' progress messages to give. This function only prints progress messages if |
|
64 |
-#' the value is 3. |
|
65 |
-#' @return For \code{elasticNetGLMtrainInterface}, an object of type |
|
66 |
-#' \code{glmnet}. For \code{elasticNetGLMpredictInterface}, either a factor |
|
67 |
-#' vector of predicted classes, a matrix of scores for each class, or a table |
|
68 |
-#' of both the class labels and class scores, depending on the setting of |
|
69 |
-#' \code{returnType}. |
|
70 |
-#' @author Dario Strbenac |
|
71 |
-#' @examples |
|
72 |
-#' |
|
73 |
-#' if(require(glmnet)) |
|
74 |
-#' { |
|
75 |
-#' # Genes 76 to 100 have differential expression. |
|
76 |
-#' genesMatrix <- sapply(1:100, function(sample) rnorm(25, 9, 0.3)) |
|
77 |
-#' genesMatrix <- rbind(genesMatrix, t(sapply(1:25, function(sample) |
|
78 |
-#' c(rnorm(75, 9, 0.3), rnorm(25, 14, 0.3))))) |
|
79 |
-#' classes <- factor(rep(c("Poor", "Good"), each = 25)) |
|
80 |
-#' rownames(genesMatrix) <- paste("Sample", 1:nrow(genesMatrix)) |
|
81 |
-#' colnames(genesMatrix) <- paste("Gene", 1:ncol(genesMatrix)) |
|
82 |
-#' |
|
83 |
-#' CVparams <- CrossValParams("k-Fold") |
|
84 |
-#' |
|
85 |
-#' trainParams <- TrainParams(elasticNetGLMtrainInterface, nlambda = 500) |
|
86 |
-#' predictParams <- PredictParams(elasticNetGLMpredictInterface) |
|
87 |
-#' modParams <- ModellingParams(selectParams = NULL, trainParams = trainParams, |
|
88 |
-#' predictParams = predictParams) |
|
89 |
-#' classified <- runTests(genesMatrix, classes, CVparams, modParams) |
|
90 |
-#' |
|
91 |
-#' classified <- calcCVperformance(classified, "Balanced Error") |
|
92 |
-#' head(tunedParameters(classified)) |
|
93 |
-#' performance(classified) |
|
94 |
-#' } |
|
95 |
-#' @rdname elasticNetGLM |
|
96 |
-#' @export |
|
97 |
-setGeneric("elasticNetGLMtrainInterface", function(measurementsTrain, ...) |
|
98 |
-standardGeneric("elasticNetGLMtrainInterface")) |
|
99 |
- |
|
100 |
-#' @rdname elasticNetGLM |
|
101 |
-#' @export |
|
102 |
-setMethod("elasticNetGLMtrainInterface", "matrix", # Matrix of numeric measurements. |
|
103 |
- function(measurementsTrain, classesTrain, ...) |
|
104 |
-{ |
|
105 |
- elasticNetGLMtrainInterface(S4Vectors::DataFrame(measurementsTrain, check.names = FALSE), classesTrain, ...) |
|
106 |
-}) |
|
1 |
+# An Interface for glmnet Package's glmnet Function. Generalised linear models with sparsity. |
|
107 | 2 |
|
108 |
-# Sample information data or one of the other inputs, transformed. |
|
109 |
-#' @rdname elasticNetGLM |
|
110 |
-#' @seealso \code{\link{elasticNetFeatures}} for a function used to extract the features |
|
111 |
-#' with non-zero coefficients from the model. |
|
112 |
-#' @export |
|
113 |
-setMethod("elasticNetGLMtrainInterface", "DataFrame", function(measurementsTrain, classesTrain, lambda = NULL, ..., verbose = 3) |
|
3 |
+elasticNetGLMtrainInterface <- function(measurementsTrain, classesTrain, lambda = NULL, ..., verbose = 3) |
|
114 |