... | ... |
@@ -409,8 +409,8 @@ TPP_importCheckConfigTable <- function(infoTable, type = "2D"){ |
409 | 409 |
} |
410 | 410 |
else if (length(which(!infoTable$RefCol %in% labelColsNew)) != |
411 | 411 |
0) { |
412 |
- stop("Labels in reference column not found", |
|
413 |
- " in any of the label columns.") |
|
412 |
+ stop(paste("Labels in reference column not found", |
|
413 |
+ "in any of the label columns.")) |
|
414 | 414 |
} |
415 | 415 |
hasCompoundCol <- any(allCols == "Compound") |
416 | 416 |
if (!hasCompoundCol) { |
... | ... |
@@ -669,7 +669,7 @@ filterOutContaminants <- function(dataLong){ |
669 | 669 |
na.rm = TRUE)){ |
670 | 670 |
message("Recomputing ratios!") |
671 | 671 |
dataOut <- dataLong %>% |
672 |
- dplyr::group_by_(idVar, "temperature") %>% |
|
672 |
+ group_by(.dots = c(idVar, "temperature")) %>% |
|
673 | 673 |
mutate(rel_value = rel_value/rel_value[label == RefCol]) %>% |
674 | 674 |
ungroup %>% |
675 | 675 |
filter(!is.na(raw_value)) %>% |
... | ... |
@@ -410,7 +410,7 @@ TPP_importCheckConfigTable <- function(infoTable, type = "2D"){ |
410 | 410 |
else if (length(which(!infoTable$RefCol %in% labelColsNew)) != |
411 | 411 |
0) { |
412 | 412 |
stop("Labels in reference column not found", |
413 |
- "in any of teh label columns.") |
|
413 |
+ " in any of the label columns.") |
|
414 | 414 |
} |
415 | 415 |
hasCompoundCol <- any(allCols == "Compound") |
416 | 416 |
if (!hasCompoundCol) { |
... | ... |
@@ -627,7 +627,7 @@ annotateDataList <- function(dataList, geneNameVar, configLong, |
627 | 627 |
conc <- unique_ID <- spread_var <- NULL |
628 | 628 |
|
629 | 629 |
combinedTab <- bind_rows(lapply(dataList, function(dat){ |
630 |
- datLong <- dat %>% tbl_df() %>% |
|
630 |
+ datLong <- dat %>% as_tibble() %>% |
|
631 | 631 |
gather(channel, signal, matches(intensityStr), matches(fcStr)) %>% |
632 | 632 |
mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>% |
633 | 633 |
left_join(configLong %>% |
... | ... |
@@ -741,8 +741,8 @@ filterOutContaminants <- function(dataLong){ |
741 | 741 |
renameColumns <- function(dataLong, idVar, geneNameVar){ |
742 | 742 |
clustername <- representative <- NULL |
743 | 743 |
|
744 |
- dplyr::rename_(dataLong, "representative" = idVar, |
|
745 |
- "clustername" = geneNameVar) %>% |
|
744 |
+ dplyr::rename(dataLong, "representative" = idVar, |
|
745 |
+ "clustername" = geneNameVar) %>% |
|
746 | 746 |
group_by(clustername) %>% |
747 | 747 |
mutate(representative = |
748 | 748 |
.paste_rmNA(unique(unlist(strsplit(representative, |
... | ... |
@@ -66,13 +66,14 @@ |
66 | 66 |
return(data) |
67 | 67 |
} |
68 | 68 |
|
69 |
+#' @import dplyr |
|
69 | 70 |
.TPP_importFct_removeDuplicates <- function(inDF, refColName, |
70 | 71 |
nonNAColNames, qualColName){ |
71 | 72 |
# internal function copied from TPP package to avoid |
72 | 73 |
# import of non-exported package functions |
73 | 74 |
message("Removing duplicate identifiers using quality column '", |
74 | 75 |
qualColName, "'...") |
75 |
- nonUniques <- unique(inDF[duplicated(inDF[[refColName]]), |
|
76 |
+ nonUniques <- unique(as_tibble(inDF)[duplicated(inDF[[refColName]]), |
|
76 | 77 |
refColName]) |
77 | 78 |
retDF <- subset(inDF, !(get(refColName) %in% nonUniques)) |
78 | 79 |
if(nrow(nonUniques)){ |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
|
1 |
+.TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
|
2 | 2 |
# internal function copied from TPP package to avoid |
3 | 3 |
# import of non-exported package functions |
4 | 4 |
. <- NULL |
... | ... |
@@ -47,7 +47,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
47 | 47 |
|
48 | 48 |
#' @importFrom utils read.delim |
49 | 49 |
#' @importFrom RCurl url.exists |
50 |
-TPP_importFct_readFiles <- function (files, naStrs){ |
|
50 |
+.TPP_importFct_readFiles <- function (files, naStrs){ |
|
51 | 51 |
# internal function copied from TPP package to avoid |
52 | 52 |
# import of non-exported package functions |
53 | 53 |
expNames <- names(files) |
... | ... |
@@ -66,51 +66,53 @@ TPP_importFct_readFiles <- function (files, naStrs){ |
66 | 66 |
return(data) |
67 | 67 |
} |
68 | 68 |
|
69 |
-TPP_importFct_removeDuplicates <- function(inDF, refColName, |
|
69 |
+.TPP_importFct_removeDuplicates <- function(inDF, refColName, |
|
70 | 70 |
nonNAColNames, qualColName){ |
71 | 71 |
# internal function copied from TPP package to avoid |
72 | 72 |
# import of non-exported package functions |
73 | 73 |
message("Removing duplicate identifiers using quality column '", |
74 | 74 |
qualColName, "'...") |
75 |
- nonUniques = unique(inDF[duplicated(inDF[[refColName]]), |
|
75 |
+ nonUniques <- unique(inDF[duplicated(inDF[[refColName]]), |
|
76 | 76 |
refColName]) |
77 |
- retDF = subset(inDF, !(get(refColName) %in% nonUniques)) |
|
78 |
- for (nU in nonUniques) { |
|
79 |
- tmpDF = subset(inDF, get(refColName) == nU) |
|
80 |
- nonNArows = NULL |
|
81 |
- for (r in seq_len(nrow(tmpDF))) { |
|
82 |
- if (any(!is.na(tmpDF[r, nonNAColNames]))) { |
|
83 |
- nonNArows = c(nonNArows, r) |
|
84 |
- } |
|
85 |
- } |
|
86 |
- if (length(nonNArows) > 1) { |
|
87 |
- if (is.null(qualColName)) { |
|
88 |
- useRow = 1 |
|
89 |
- } |
|
90 |
- else { |
|
91 |
- qualVals = tmpDF[nonNArows, qualColName] |
|
92 |
- useRow = match(max(qualVals), qualVals) |
|
77 |
+ retDF <- subset(inDF, !(get(refColName) %in% nonUniques)) |
|
78 |
+ if(nrow(nonUniques)){ |
|
79 |
+ for (nU in nonUniques) { |
|
80 |
+ tmpDF <- subset(inDF, get(refColName) == nU) |
|
81 |
+ nonNArows <- NULL |
|
82 |
+ for (r in seq_len(nrow(tmpDF))) { |
|
83 |
+ if (any(!is.na(tmpDF[r, nonNAColNames]))) { |
|
84 |
+ nonNArows <- c(nonNArows, r) |
|
85 |
+ } |
|
86 |
+ } |
|
87 |
+ if (length(nonNArows) > 1) { |
|
88 |
+ if (is.null(qualColName)) { |
|
89 |
+ useRow <- 1 |
|
90 |
+ } |
|
91 |
+ else { |
|
92 |
+ qualVals <- tmpDF[nonNArows, qualColName] |
|
93 |
+ useRow <- match(max(qualVals), qualVals) |
|
94 |
+ } |
|
95 |
+ } |
|
96 |
+ else { |
|
97 |
+ useRow <- nonNArows[1] |
|
98 |
+ } |
|
99 |
+ retDF <- rbind(retDF, tmpDF[useRow, ]) |
|
93 | 100 |
} |
94 |
- } |
|
95 |
- else { |
|
96 |
- useRow = nonNArows[1] |
|
97 |
- } |
|
98 |
- retDF = rbind(retDF, tmpDF[useRow, ]) |
|
99 | 101 |
} |
100 | 102 |
message(nrow(retDF), " out of ", nrow(inDF), |
101 | 103 |
" rows kept for further analysis.") |
102 | 104 |
return(retDF) |
103 | 105 |
} |
104 | 106 |
|
105 |
-TPP_replaceZeros <- function(x){ |
|
107 |
+.TPP_replaceZeros <- function(x){ |
|
106 | 108 |
# internal function copied from TPP package to avoid |
107 | 109 |
# import of non-exported package functions |
108 | 110 |
x[which(x == 0)] <- NA |
109 | 111 |
return(x) |
110 | 112 |
} |
111 | 113 |
|
112 |
-TPP_importFct_rmZeroSias <- function(configTable, data.list, |
|
113 |
- intensityStr){ |
|
114 |
+.TPP_importFct_rmZeroSias <- function(data.list, |
|
115 |
+ intensityStr){ |
|
114 | 116 |
# internal function copied from TPP package to avoid |
115 | 117 |
# import of non-exported package functions |
116 | 118 |
out <- lapply(names(data.list), function(l.name) { |
... | ... |
@@ -119,7 +121,7 @@ TPP_importFct_rmZeroSias <- function(configTable, data.list, |
119 | 121 |
intensity.cols <- grep(intensityStr, colsTmp, value = TRUE) |
120 | 122 |
intensity.df <- subset(datTmp, select = intensity.cols) %>% |
121 | 123 |
mutate_all(as.character) %>% mutate_all(as.numeric) |
122 |
- new.intensity.df <- intensity.df %>% mutate_all(TPP_replaceZeros) |
|
124 |
+ new.intensity.df <- intensity.df %>% mutate_all(.TPP_replaceZeros) |
|
123 | 125 |
datTmp[, intensity.cols] <- new.intensity.df |
124 | 126 |
return(datTmp) |
125 | 127 |
}) |
... | ... |
@@ -127,7 +129,7 @@ TPP_importFct_rmZeroSias <- function(configTable, data.list, |
127 | 129 |
return(out) |
128 | 130 |
} |
129 | 131 |
|
130 |
-TPP_importFct_checkExperimentCol <- function(expCol){ |
|
132 |
+.TPP_importFct_checkExperimentCol <- function(expCol){ |
|
131 | 133 |
# internal function copied from TPP package to avoid |
132 | 134 |
# import of non-exported package functions |
133 | 135 |
if (is.null(expCol)) { |
... | ... |
@@ -149,7 +151,7 @@ TPP_importFct_checkExperimentCol <- function(expCol){ |
149 | 151 |
return(newExpNames) |
150 | 152 |
} |
151 | 153 |
|
152 |
-TPP_importFct_checkComparisons <- function(confgTable){ |
|
154 |
+.TPP_importFct_checkComparisons <- function(confgTable){ |
|
153 | 155 |
# internal function copied from TPP package to avoid |
154 | 156 |
# import of non-exported package functions |
155 | 157 |
expConds <- confgTable$Condition |
... | ... |
@@ -167,7 +169,7 @@ TPP_importFct_checkComparisons <- function(confgTable){ |
167 | 169 |
} |
168 | 170 |
validCompCols <- compCols[!comp_unequal_two] |
169 | 171 |
allCompStrs <- c() |
170 |
- if (length(validCompCols) > 0) { |
|
172 |
+ if (length(validCompCols)) { |
|
171 | 173 |
message("Comparisons will be performed between the following experiments:") |
172 | 174 |
for (colName in validCompCols) { |
173 | 175 |
current_compEntries <- confgTable[[colName]] |
... | ... |
@@ -196,7 +198,7 @@ TPP_importFct_checkComparisons <- function(confgTable){ |
196 | 198 |
} |
197 | 199 |
|
198 | 200 |
#' @importFrom stringr str_to_title |
199 |
-TPP_importFct_checkConditions <- function(condInfo, |
|
201 |
+.TPP_importFct_checkConditions <- function(condInfo, |
|
200 | 202 |
expectedLength){ |
201 | 203 |
# internal function copied from TPP package to avoid |
202 | 204 |
# import of non-exported package functions |
... | ... |
@@ -227,7 +229,7 @@ TPP_importFct_checkConditions <- function(condInfo, |
227 | 229 |
return(condInfo) |
228 | 230 |
} |
229 | 231 |
|
230 |
-TPP_checkFunctionArgs <- |
|
232 |
+.TPP_checkFunctionArgs <- |
|
231 | 233 |
function(functionCall, expectedArguments){ |
232 | 234 |
# internal function copied from TPP package to avoid |
233 | 235 |
# import of non-exported package functions |
... | ... |
@@ -242,7 +244,7 @@ TPP_checkFunctionArgs <- |
242 | 244 |
}) |
243 | 245 |
} |
244 | 246 |
|
245 |
-TPP_nonLabelColumns <- function(){ |
|
247 |
+.TPP_nonLabelColumns <- function(){ |
|
246 | 248 |
# internal function copied from TPP package to avoid |
247 | 249 |
# import of non-exported package functions |
248 | 250 |
out <- data.frame( |
... | ... |
@@ -259,12 +261,12 @@ TPP_nonLabelColumns <- function(){ |
259 | 261 |
return(out) |
260 | 262 |
} |
261 | 263 |
|
262 |
-TPP_detectLabelColumnsInConfigTable <- |
|
264 |
+.TPP_detectLabelColumnsInConfigTable <- |
|
263 | 265 |
function(allColumns){ |
264 | 266 |
# internal function copied from TPP package to avoid |
265 | 267 |
# import of non-exported package functions |
266 |
- TPP_checkFunctionArgs(match.call(), c("allColumns")) |
|
267 |
- noLabelCols <- TPP_nonLabelColumns()$column %>% |
|
268 |
+ .TPP_checkFunctionArgs(match.call(), c("allColumns")) |
|
269 |
+ noLabelCols <- .TPP_nonLabelColumns()$column %>% |
|
268 | 270 |
as.character %>% |
269 | 271 |
unique |
270 | 272 |
compCols <- grep("comparison", allColumns, value = TRUE, |
... | ... |
@@ -274,7 +276,7 @@ TPP_detectLabelColumnsInConfigTable <- |
274 | 276 |
return(labelCols) |
275 | 277 |
} |
276 | 278 |
|
277 |
-TPP_importCheckTemperatures <- function(temp){ |
|
279 |
+.TPP_importCheckTemperatures <- function(temp){ |
|
278 | 280 |
# internal function copied from TPP package to avoid |
279 | 281 |
# import of non-exported package functions |
280 | 282 |
tempMatrix <- as.matrix(temp) |
... | ... |
@@ -290,7 +292,7 @@ TPP_importCheckTemperatures <- function(temp){ |
290 | 292 |
|
291 | 293 |
#' @importFrom openxlsx read.xlsx |
292 | 294 |
#' @importFrom utils read.table |
293 |
-TPP_importFct_readConfigTable <- function(cfg){ |
|
295 |
+.TPP_importFct_readConfigTable <- function(cfg){ |
|
294 | 296 |
# internal function copied from TPP package to avoid |
295 | 297 |
# import of non-exported package functions |
296 | 298 |
if (is.character(cfg)) { |
... | ... |
@@ -326,11 +328,23 @@ TPP_importFct_readConfigTable <- function(cfg){ |
326 | 328 |
return(cfg) |
327 | 329 |
} |
328 | 330 |
|
329 |
-TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
|
330 |
- # internal function copied from TPP package to avoid |
|
331 |
- # import of non-exported package functions |
|
332 |
- TPP_checkFunctionArgs(match.call(), c("infoTable", "type")) |
|
333 |
- Experiment = Path = Compound <- NULL |
|
331 |
+#' Import and chech configuration table |
|
332 |
+#' |
|
333 |
+#' @param infoTable character string of a file path to |
|
334 |
+#' a config table (excel,txt or csv file) or data frame |
|
335 |
+#' containing a config table |
|
336 |
+#' @param type charater string indicating dataset type |
|
337 |
+#' default is 2D |
|
338 |
+#' |
|
339 |
+#' @return data frame with config table |
|
340 |
+#' |
|
341 |
+#' @examples |
|
342 |
+#' data("config_tab") |
|
343 |
+#' TPP_importCheckConfigTable(config_tab, type = "2D") |
|
344 |
+#' @export |
|
345 |
+TPP_importCheckConfigTable <- function(infoTable, type = "2D"){ |
|
346 |
+ .TPP_checkFunctionArgs(match.call(), c("infoTable", "type")) |
|
347 |
+ Experiment <- Path <- Compound <- NULL |
|
334 | 348 |
isValidDF <- FALSE |
335 | 349 |
if (is.data.frame(infoTable)) { |
336 | 350 |
if ((ncol(infoTable) > 1) & |
... | ... |
@@ -349,9 +363,9 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
349 | 363 |
if (!isValidType) { |
350 | 364 |
stop("'type' must have this value: '2D'") |
351 | 365 |
} |
352 |
- infoTable <- TPP_importFct_readConfigTable(cfg = infoTable) |
|
366 |
+ infoTable <- .TPP_importFct_readConfigTable(cfg = infoTable) |
|
353 | 367 |
infoTable$Experiment <- |
354 |
- TPP_importFct_checkExperimentCol(infoTable$Experiment) |
|
368 |
+ .TPP_importFct_checkExperimentCol(infoTable$Experiment) |
|
355 | 369 |
infoTable <- subset(infoTable, Experiment != "") |
356 | 370 |
givenPaths <- NULL |
357 | 371 |
if (any("Path" %in% colnames(infoTable))) { |
... | ... |
@@ -366,7 +380,7 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
366 | 380 |
compStrs <- NA |
367 | 381 |
infoTable$Condition <- NULL |
368 | 382 |
allCols <- colnames(infoTable) |
369 |
- labelCols <- TPP_detectLabelColumnsInConfigTable(allColumns = allCols) |
|
383 |
+ labelCols <- .TPP_detectLabelColumnsInConfigTable(allColumns = allCols) |
|
370 | 384 |
labelValues <- infoTable[, labelCols] |
371 | 385 |
labelValuesNum <- suppressWarnings(labelValues %>% apply(2, |
372 | 386 |
as.numeric)) |
... | ... |
@@ -412,7 +426,7 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
412 | 426 |
} |
413 | 427 |
else { |
414 | 428 |
temperatures <- subset(infoTable, select = labelColsNew) |
415 |
- tempMatrix <- TPP_importCheckTemperatures(temp = temperatures) |
|
429 |
+ tempMatrix <- .TPP_importCheckTemperatures(temp = temperatures) |
|
416 | 430 |
infoList <- list( |
417 | 431 |
expNames = as.character(infoTable$Experiment), |
418 | 432 |
expCond = infoTable$Condition, files = givenPaths, |
... | ... |
@@ -423,6 +437,45 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
423 | 437 |
return(out) |
424 | 438 |
} |
425 | 439 |
|
440 |
+#' Import 2D-TPP dataset main function |
|
441 |
+#' |
|
442 |
+#' @param configTable character string of a file path to a config table |
|
443 |
+#' @param data possible list of datasets from different MS runs |
|
444 |
+#' corresponding to a 2D-TPP dataset, circumvents loading datasets |
|
445 |
+#' referencend in config table, default is NULL |
|
446 |
+#' @param idVar character string indicating which data column provides the |
|
447 |
+#' unique identifiers for each protein. |
|
448 |
+#' @param intensityStr character string indicating which columns contain |
|
449 |
+#' raw intensities measurements |
|
450 |
+#' @param fcStr character string indicating which columns contain the actual |
|
451 |
+#' fold change values. Those column names containing the suffix \code{fcStr} |
|
452 |
+#' will be regarded as containing fold change values. |
|
453 |
+#' @param naStrs character vector indicating missing values in the data table. |
|
454 |
+#' When reading data from file, this value will be passed on to the argument |
|
455 |
+#' \code{na.strings} in function \code{read.delim}. |
|
456 |
+#' @param addCol character string indicating additional column to import |
|
457 |
+#' @param nonZeroCols column like default qssm that should be imported and |
|
458 |
+#' requested to be non-zero in analyzed data |
|
459 |
+#' @param qualColName character string indicating which column can be used for |
|
460 |
+#' additional quality criteria when deciding between different non-unique |
|
461 |
+#' protein identifiers. |
|
462 |
+#' |
|
463 |
+#' @return list of data frames containing different |
|
464 |
+#' datasets |
|
465 |
+#' |
|
466 |
+#' @examples |
|
467 |
+#' data("config_tab") |
|
468 |
+#' data("raw_dat_list") |
|
469 |
+#' dataList <- import2dMain(configTable = config_tab, |
|
470 |
+#' data = raw_dat_list, |
|
471 |
+#' idVar = "protein_id", |
|
472 |
+#' fcStr = "rel_fc_", |
|
473 |
+#' addCol = "gene_name", |
|
474 |
+#' naStrs = NA, |
|
475 |
+#' intensityStr = "signal_sum_", |
|
476 |
+#' nonZeroCols = "qusm", |
|
477 |
+#' qualColName = "qupm") |
|
478 |
+#' @export |
|
426 | 479 |
import2dMain <- function(configTable, data, idVar, fcStr, |
427 | 480 |
addCol, naStrs, intensityStr, |
428 | 481 |
qualColName, nonZeroCols){ |
... | ... |
@@ -434,20 +487,18 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
434 | 487 |
files <- NULL |
435 | 488 |
} |
436 | 489 |
} |
437 |
- Experiment = Compound = Temperature = RefCol <- NULL |
|
490 |
+ Experiment <- Compound <- Temperature <- RefCol <- NULL |
|
438 | 491 |
expNames <- configTable$Experiment |
439 |
- argList <- TPP_importFct_CheckDataFormat(dataframes = data, |
|
492 |
+ argList <- .TPP_importFct_CheckDataFormat(dataframes = data, |
|
440 | 493 |
files = files, |
441 | 494 |
expNames = expNames) |
442 | 495 |
data <- argList[["dataframes"]] |
443 | 496 |
files <- argList[["files"]] |
444 | 497 |
if (!is.null(files)) { |
445 | 498 |
files2 <- files[!duplicated(names(files))] |
446 |
- data <- TPP_importFct_readFiles(files = files2, |
|
499 |
+ data <- .TPP_importFct_readFiles(files = files2, |
|
447 | 500 |
naStrs = naStrs) |
448 | 501 |
} |
449 |
- configTable %>% group_by(Experiment, Compound, |
|
450 |
- Temperature, RefCol) |
|
451 | 502 |
iVec <- seq_len(nrow(configTable)) |
452 | 503 |
dataList <- lapply(iVec, function(iTmp) { |
453 | 504 |
rowTmp <- configTable[iTmp, ] |
... | ... |
@@ -459,7 +510,7 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
459 | 510 |
"RefCol", "Path", "Condition") |
460 | 511 |
allCols <- colnames(rowTmp) |
461 | 512 |
labelCols <- setdiff(allCols, noFCCols) |
462 |
- labelValues <- suppressMessages(rowTmp[, labelCols] %>% |
|
513 |
+ labelValues <- suppressWarnings(rowTmp[, labelCols] %>% |
|
463 | 514 |
as.numeric) |
464 | 515 |
labelColsNum <- labelCols[!is.na(labelValues)] |
465 | 516 |
signalCols <- paste(intensityStr, labelColsNum, sep = "") |
... | ... |
@@ -480,7 +531,7 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
480 | 531 |
notFound, paste("'. Please check the suffices and the", |
481 | 532 |
"additional column names you have specified.")) |
482 | 533 |
} |
483 |
- dataFiltered <- TPP_importFct_removeDuplicates( |
|
534 |
+ dataFiltered <- .TPP_importFct_removeDuplicates( |
|
484 | 535 |
inDF = dataTmp,refColName = idVar, |
485 | 536 |
nonNAColNames = dataCols, |
486 | 537 |
qualColName = qualColName[1]) |
... | ... |
@@ -498,17 +549,24 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
498 | 549 |
return(newName) |
499 | 550 |
}, "") |
500 | 551 |
names(dataList) <- newNames |
501 |
- out <- TPP_importFct_rmZeroSias(configTable = configTable, |
|
502 |
- data.list = dataList, |
|
503 |
- intensityStr = intensityStr) |
|
552 |
+ out <- .TPP_importFct_rmZeroSias(data.list = dataList, |
|
553 |
+ intensityStr = intensityStr) |
|
504 | 554 |
return(out) |
505 | 555 |
} |
506 | 556 |
|
557 |
+#' Tranform configuration table from wide to long |
|
558 |
+#' |
|
559 |
+#' @param configWide data frame containing a config table |
|
560 |
+#' @return data frame containing config table in long format |
|
561 |
+#' |
|
507 | 562 |
#' @importFrom tidyr gather |
563 |
+#' @examples |
|
564 |
+#' data("config_tab") |
|
565 |
+#' configWide2Long(configWide = config_tab) |
|
566 |
+#' |
|
567 |
+#' @export |
|
508 | 568 |
configWide2Long <- function(configWide){ |
509 |
- # internal function to tranform config table into long format |
|
510 |
- |
|
511 |
- Path <- label <- conc <- Compound <- Experiment <- |
|
569 |
+ Path <- label <- conc <- Compound <- Experiment <- |
|
512 | 570 |
Temperature <- RefCol <- NULL |
513 | 571 |
|
514 | 572 |
if(any(grepl("Path", colnames(configWide)))){ |
... | ... |
@@ -525,11 +583,45 @@ configWide2Long <- function(configWide){ |
525 | 583 |
} |
526 | 584 |
} |
527 | 585 |
|
586 |
+#' Annotate imported data list using a config table |
|
587 |
+#' @param dataList list of datasets from different MS runs |
|
588 |
+#' corresponding to a 2D-TPP dataset |
|
589 |
+#' @param geneNameVar character string of the column name that describes |
|
590 |
+#' the gene name of a given protein in the raw data files |
|
591 |
+#' @param configLong long formatted data frame of a corresponding |
|
592 |
+#' config table |
|
593 |
+#' @param intensityStr character string indicating which columns contain |
|
594 |
+#' raw intensities measurements |
|
595 |
+#' @param fcStr character string indicating which columns contain the actual |
|
596 |
+#' fold change values. Those column names containing the suffix \code{fcStr} |
|
597 |
+#' will be regarded as containing fold change values. |
|
598 |
+#' |
|
599 |
+#' @return data frame containing all data annotated |
|
600 |
+#' by information supplied in the config table |
|
601 |
+#' |
|
528 | 602 |
#' @importFrom tidyr spread |
603 |
+#' |
|
604 |
+#' @examples |
|
605 |
+#' data("config_tab") |
|
606 |
+#' data("raw_dat_list") |
|
607 |
+#' dataList <- import2dMain(configTable = config_tab, |
|
608 |
+#' data = raw_dat_list, |
|
609 |
+#' idVar = "protein_id", |
|
610 |
+#' fcStr = "rel_fc_", |
|
611 |
+#' addCol = "gene_name", |
|
612 |
+#' naStrs = NA, |
|
613 |
+#' intensityStr = "signal_sum_", |
|
614 |
+#' nonZeroCols = "qusm", |
|
615 |
+#' qualColName = "qupm") |
|
616 |
+#' configLong <- configWide2Long(configWide = config_tab) |
|
617 |
+#' annotateDataList(dataList = dataList, |
|
618 |
+#' geneNameVar = "gene_name", |
|
619 |
+#' configLong = configLong, |
|
620 |
+#' intensityStr = "signal_sum_", |
|
621 |
+#' fcStr = "rel_fc_") |
|
622 |
+#' @export |
|
529 | 623 |
annotateDataList <- function(dataList, geneNameVar, configLong, |
530 | 624 |
intensityStr, fcStr){ |
531 |
- # internal function to annotate list of 2D-TPP data subtables with |
|
532 |
- # information from config table |
|
533 | 625 |
channel <- signal <- Temperature <- RefCol <- label <- |
534 | 626 |
conc <- unique_ID <- spread_var <- NULL |
535 | 627 |
|
... | ... |
@@ -548,13 +640,25 @@ annotateDataList <- function(dataList, geneNameVar, configLong, |
548 | 640 |
return(combinedTab) |
549 | 641 |
} |
550 | 642 |
|
643 |
+#' Filter out contaminants |
|
644 |
+#' |
|
645 |
+#' @param dataLong long format data frame of imported dataset |
|
646 |
+#' |
|
647 |
+#' @return data frame containing full dataset filtered to |
|
648 |
+#' contain no contaminants |
|
649 |
+#' |
|
650 |
+#' @examples |
|
651 |
+#' data("simulated_cell_extract_df") |
|
652 |
+#' filterOutContaminants(simulated_cell_extract_df) |
|
653 |
+#' |
|
654 |
+#' @export |
|
551 | 655 |
filterOutContaminants <- function(dataLong){ |
552 | 656 |
# internal function to filter out contaminants |
553 | 657 |
representative <- NULL |
554 | 658 |
filter(dataLong, !grepl("##", representative)) |
555 | 659 |
} |
556 | 660 |
|
557 |
-checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
|
661 |
+.checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
|
558 | 662 |
# internal function to check that protein |
559 | 663 |
# fold changes are computed |
560 | 664 |
# relative to the correct TMT channel |
... | ... |
@@ -583,7 +687,7 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
583 | 687 |
} |
584 | 688 |
|
585 | 689 |
#' @importFrom stats median |
586 |
-medianNormalizeRatios <- function(dataLong){ |
|
690 |
+.medianNormalizeRatios <- function(dataLong){ |
|
587 | 691 |
# internal function to perform median normalization |
588 | 692 |
# of ratios |
589 | 693 |
rel_value <- temperature <- conc <- |
... | ... |
@@ -599,17 +703,48 @@ medianNormalizeRatios <- function(dataLong){ |
599 | 703 |
return(dataOut) |
600 | 704 |
} |
601 | 705 |
|
706 |
+#' Rename columns of imported data frame |
|
707 |
+#' |
|
708 |
+#' @param dataLong long format data frame of imported dataset |
|
709 |
+#' @param idVar character string indicating which data column provides the |
|
710 |
+#' unique identifiers for each protein. |
|
711 |
+#' @param geneNameVar character string of the column name that describes |
|
712 |
+#' the gene name of a given protein in the raw data files |
|
713 |
+#' |
|
714 |
+#' @return data frame containing imported data with renamed |
|
715 |
+#' columns |
|
716 |
+#' |
|
717 |
+#' @examples |
|
718 |
+#' data("config_tab") |
|
719 |
+#' data("raw_dat_list") |
|
720 |
+#' |
|
721 |
+#' dataList <- import2dMain(configTable = config_tab, |
|
722 |
+#' data = raw_dat_list, |
|
723 |
+#' idVar = "protein_id", |
|
724 |
+#' fcStr = "rel_fc_", |
|
725 |
+#' addCol = "gene_name", |
|
726 |
+#' naStrs = NA, |
|
727 |
+#' intensityStr = "signal_sum_", |
|
728 |
+#' nonZeroCols = "qusm", |
|
729 |
+#' qualColName = "qupm") |
|
730 |
+#' configLong <- configWide2Long(configWide = config_tab) |
|
731 |
+#' annoDat <- annotateDataList(dataList = dataList, |
|
732 |
+#' geneNameVar = "gene_name", |
|
733 |
+#' configLong = configLong, |
|
734 |
+#' intensityStr = "signal_sum_", |
|
735 |
+#' fcStr = "rel_fc_") |
|
736 |
+#' renameColumns(annoDat, |
|
737 |
+#' idVar = "protein_id", |
|
738 |
+#' geneNameVar = "gene_name") |
|
739 |
+#' @export |
|
602 | 740 |
renameColumns <- function(dataLong, idVar, geneNameVar){ |
603 |
- # internal function to rename column names to |
|
604 |
- # match lazyeval variable |
|
605 |
- # names of main function |
|
606 | 741 |
clustername <- representative <- NULL |
607 | 742 |
|
608 | 743 |
dplyr::rename_(dataLong, "representative" = idVar, |
609 | 744 |
"clustername" = geneNameVar) %>% |
610 | 745 |
group_by(clustername) %>% |
611 | 746 |
mutate(representative = |
612 |
- paste_rmNA(unique(unlist(strsplit(representative, |
|
747 |
+ .paste_rmNA(unique(unlist(strsplit(representative, |
|
613 | 748 |
split = "\\|"))), |
614 | 749 |
sep = "|")) %>% |
615 | 750 |
ungroup() |
... | ... |
@@ -698,12 +833,12 @@ import2dDataset <- function(configTable, data, |
698 | 833 |
intensityStr = intensityStr, |
699 | 834 |
fcStr = fcStr) |
700 | 835 |
|
701 |
- dataRatioChecked <- checkRatioRef(dataLong, idVar = idVar, |
|
702 |
- concFactor = concFactor) |
|
836 |
+ dataRatioChecked <- .checkRatioRef(dataLong, idVar = idVar, |
|
837 |
+ concFactor = concFactor) |
|
703 | 838 |
|
704 | 839 |
if(medianNormalizeFC){ |
705 | 840 |
message("Median normalizing fold changes...") |
706 |
- dataNorm <- medianNormalizeRatios(dataRatioChecked) |
|
841 |
+ dataNorm <- .medianNormalizeRatios(dataRatioChecked) |
|
707 | 842 |
}else{ |
708 | 843 |
dataNorm <- dataRatioChecked |
709 | 844 |
} |
... | ... |
@@ -1,46 +1,48 @@ |
1 | 1 |
TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
2 |
- # internal function copied from TPP package to avoid |
|
3 |
- # import of non-exported package functions |
|
4 |
- . <- NULL |
|
5 |
- isDF <- !is.null(dataframes) |
|
6 |
- isF <- !is.null(files) |
|
7 |
- isBoth <- isDF & isF |
|
8 |
- isNone <- !(isDF | isF) |
|
9 |
- if (isBoth) { |
|
10 |
- stop("Data import function received a filename AND a dataframe object. \n |
|
11 |
- Please specify only one.") |
|
12 |
- } |
|
13 |
- else if (isNone) { |
|
14 |
- stop("Data import function requires a filename or a dataframe object. \n |
|
15 |
- Please specify one.") |
|
16 |
- } |
|
17 |
- if (isDF) { |
|
18 |
- isClassList <- is.list(dataframes) && !is.data.frame(dataframes) |
|
19 |
- isClassDF <- is.data.frame(dataframes) |
|
20 |
- if (isClassList) { |
|
21 |
- classesInList <- dataframes %>% |
|
22 |
- vapply(. %>% inherits(., "data.frame"), TRUE) |
|
23 |
- if (!all(classesInList)) { |
|
24 |
- stop(paste("Argument 'dataframes' contains elements that are", |
|
25 |
- "not of type 'data.frame' at the following positions: "), |
|
26 |
- which(!classesInList) %>% paste(collapse = ", "), |
|
27 |
- ".") |
|
28 |
- } |
|
2 |
+ # internal function copied from TPP package to avoid |
|
3 |
+ # import of non-exported package functions |
|
4 |
+ . <- NULL |
|
5 |
+ isDF <- !is.null(dataframes) |
|
6 |
+ isF <- !is.null(files) |
|
7 |
+ isBoth <- isDF & isF |
|
8 |
+ isNone <- !(isDF | isF) |
|
9 |
+ if (isBoth) { |
|
10 |
+ stop("Data import function received a", |
|
11 |
+ " filename AND a dataframe object. \n", |
|
12 |
+ "Please specify only one.") |
|
29 | 13 |
} |
30 |
- else if (isClassDF) { |
|
31 |
- dataframes <- list(dataframes) |
|
32 |
- names(dataframes) <- expNames |
|
14 |
+ else if (isNone) { |
|
15 |
+ stop("Data import function requires a", |
|
16 |
+ " filename or a dataframe object. \n", |
|
17 |
+ "Please specify one.") |
|
33 | 18 |
} |
34 |
- else { |
|
35 |
- stop("Argument 'dataframes' must be either an object of class \n |
|
36 |
- 'data.frame', or a list of such objects.") |
|
19 |
+ if (isDF) { |
|
20 |
+ isClassList <- is.list(dataframes) && !is.data.frame(dataframes) |
|
21 |
+ isClassDF <- is.data.frame(dataframes) |
|
22 |
+ if (isClassList) { |
|
23 |
+ classesInList <- dataframes %>% |
|
24 |
+ vapply(. %>% inherits(., "data.frame"), TRUE) |
|
25 |
+ if (!all(classesInList)) { |
|
26 |
+ stop(paste("Argument 'dataframes' contains", |
|
27 |
+ "elements that are not of type", |
|
28 |
+ "'data.frame' at the following positions: "), |
|
29 |
+ which(!classesInList) %>% paste(collapse = ", "), ".") |
|
30 |
+ } |
|
31 |
+ } |
|
32 |
+ else if (isClassDF) { |
|
33 |
+ dataframes <- list(dataframes) |
|
34 |
+ names(dataframes) <- expNames |
|
35 |
+ } |
|
36 |
+ else { |
|
37 |
+ stop("Argument 'dataframes' must be either an object of class \n |
|
38 |
+ 'data.frame', or a list of such objects.") |
|
39 |
+ } |
|
37 | 40 |
} |
38 |
- } |
|
39 |
- if (isF) { |
|
40 |
- files <- as.character(files) |
|
41 |
- names(files) <- expNames |
|
42 |
- } |
|
43 |
- return(list(files = files, dataframes = dataframes)) |
|
41 |
+ if (isF) { |
|
42 |
+ files <- as.character(files) |
|
43 |
+ names(files) <- expNames |
|
44 |
+ } |
|
45 |
+ return(list(files = files, dataframes = dataframes)) |
|
44 | 46 |
} |
45 | 47 |
|
46 | 48 |
#' @importFrom utils read.delim |
... | ... |
@@ -95,7 +97,8 @@ TPP_importFct_removeDuplicates <- function(inDF, refColName, |
95 | 97 |
} |
96 | 98 |
retDF = rbind(retDF, tmpDF[useRow, ]) |
97 | 99 |
} |
98 |
- message(nrow(retDF), " out of ", nrow(inDF), " rows kept for further analysis.") |
|
100 |
+ message(nrow(retDF), " out of ", nrow(inDF), |
|
101 |
+ " rows kept for further analysis.") |
|
99 | 102 |
return(retDF) |
100 | 103 |
} |
101 | 104 |
|
... | ... |
@@ -128,14 +131,16 @@ TPP_importFct_checkExperimentCol <- function(expCol){ |
128 | 131 |
# internal function copied from TPP package to avoid |
129 | 132 |
# import of non-exported package functions |
130 | 133 |
if (is.null(expCol)) { |
131 |
- m <- "Config table needs an 'Experiment' column with unique experiment IDs." |
|
134 |
+ m <- paste("Config table needs an 'Experiment'", |
|
135 |
+ "column with unique experiment IDs.") |
|
132 | 136 |
stop(m, "\n") |
133 | 137 |
} |
134 | 138 |
oldExpNames <- expCol |
135 | 139 |
newExpNames <- gsub("([^[:alnum:]])", "_", expCol) |
136 | 140 |
iChanged <- oldExpNames != newExpNames |
137 | 141 |
if (any(iChanged)) { |
138 |
- m1 <- "Replaced non-alphanumeric characters in the 'Experiment' column entries:" |
|
142 |
+ m1 <- paste("Replaced non-alphanumeric characters", |
|
143 |
+ "in the 'Experiment' column entries:") |
|
139 | 144 |
m2 <- paste("'", paste(oldExpNames[iChanged], collapse = "', '"), |
140 | 145 |
"'\nby\n'", paste(newExpNames[iChanged], collapse = "', '"), |
141 | 146 |
sep = "") |
... | ... |
@@ -197,32 +202,42 @@ TPP_importFct_checkConditions <- function(condInfo, |
197 | 202 |
# import of non-exported package functions |
198 | 203 |
flagGenerateConds <- FALSE |
199 | 204 |
if (is.null(condInfo)) { |
200 |
- message("No information about experimental conditions given. Assigning NA instead.\n |
|
201 |
- Reminder: recognition of Vehicle and Treatment groups during pairwise \n |
|
202 |
- comparisons is only possible when they are specified in the config table.\n") |
|
205 |
+ message("No information about experimental conditions given.", |
|
206 |
+ "Assigning NA instead.\n", |
|
207 |
+ "Reminder: recognition of Vehicle and Treatment groups", |
|
208 |
+ "during pairwise \n", |
|
209 |
+ "comparisons is only possible when they are specified ", |
|
210 |
+ "in the config table.\n") |
|
203 | 211 |
condInfo <- rep(NA_character_, expectedLength) |
204 | 212 |
} |
205 | 213 |
else { |
206 |
- condInfo <- as.character(condInfo) %>% stringr::str_to_title() |
|
214 |
+ condInfo <- as.character(condInfo) %>% |
|
215 |
+ stringr::str_to_title() |
|
207 | 216 |
condLevels <- unique(condInfo) |
208 |
- invalidLevels = setdiff(condLevels, c("Treatment", "Vehicle")) |
|
217 |
+ invalidLevels = |
|
218 |
+ setdiff(condLevels, c("Treatment", "Vehicle")) |
|
209 | 219 |
if (length(invalidLevels) > 0) { |
210 | 220 |
stop("The entry '", invalidLevels, |
211 |
- paste("' in the condition column is invalid. Only the values 'Treatment' and", |
|
212 |
- "'Vehicle' are allowed. Please correct this and start again.")) |
|
221 |
+ paste("' in the condition column is invalid.", |
|
222 |
+ "Only the values 'Treatment' and", |
|
223 |
+ "'Vehicle' are allowed. Please correct", |
|
224 |
+ "this and start again.")) |
|
213 | 225 |
} |
214 | 226 |
} |
215 | 227 |
return(condInfo) |
216 | 228 |
} |
217 | 229 |
|
218 |
-TPP_checkFunctionArgs <- function(functionCall, expectedArguments){ |
|
230 |
+TPP_checkFunctionArgs <- |
|
231 |
+ function(functionCall, expectedArguments){ |
|
219 | 232 |
# internal function copied from TPP package to avoid |
220 | 233 |
# import of non-exported package functions |
221 | 234 |
myArgs <- names(functionCall) |
222 | 235 |
lapply(expectedArguments, function(arg) { |
223 | 236 |
if (!arg %in% myArgs) { |
224 |
- stop("Error in ", paste(functionCall)[1], ": argument '", |
|
225 |
- arg, "' is missing, with no default", call. = FALSE) |
|
237 |
+ stop("Error in ", paste(functionCall)[1], |
|
238 |
+ ": argument '", |
|
239 |
+ arg, "' is missing, with no default", |
|
240 |
+ call. = FALSE) |
|
226 | 241 |
} |
227 | 242 |
}) |
228 | 243 |
} |
... | ... |
@@ -244,11 +259,13 @@ TPP_nonLabelColumns <- function(){ |
244 | 259 |
return(out) |
245 | 260 |
} |
246 | 261 |
|
247 |
-TPP_detectLabelColumnsInConfigTable <- function(allColumns){ |
|
262 |
+TPP_detectLabelColumnsInConfigTable <- |
|
263 |
+ function(allColumns){ |
|
248 | 264 |
# internal function copied from TPP package to avoid |
249 | 265 |
# import of non-exported package functions |
250 | 266 |
TPP_checkFunctionArgs(match.call(), c("allColumns")) |
251 |
- noLabelCols <- TPP_nonLabelColumns()$column %>% as.character %>% |
|
267 |
+ noLabelCols <- TPP_nonLabelColumns()$column %>% |
|
268 |
+ as.character %>% |
|
252 | 269 |
unique |
253 | 270 |
compCols <- grep("comparison", allColumns, value = TRUE, |
254 | 271 |
ignore.case = TRUE) |
... | ... |
@@ -265,7 +282,8 @@ TPP_importCheckTemperatures <- function(temp){ |
265 | 282 |
naRows <- apply(is.na(tempMatrix), 1, all) |
266 | 283 |
if (any(naRows)) { |
267 | 284 |
stop("Row(s) ", paste(which(naRows), collapse = ", "), |
268 |
- " in the configuration table contain only missing temperature values.") |
|
285 |
+ " in the configuration table contain", |
|
286 |
+ " only missing temperature values.") |
|
269 | 287 |
} |
270 | 288 |
return(tempMatrix) |
271 | 289 |
} |
... | ... |
@@ -280,24 +298,28 @@ TPP_importFct_readConfigTable <- function(cfg){ |
280 | 298 |
strChunks <- strsplit(cfg, "\\.")[[1]] |
281 | 299 |
fileExtension <- strChunks[length(strChunks)] |
282 | 300 |
if (fileExtension == "txt") { |
283 |
- tab <- read.table(file = cfg, header = TRUE, |
|
284 |
- check.names = FALSE, stringsAsFactors = FALSE, |
|
285 |
- sep = "\t") |
|
301 |
+ tab <- read.table( |
|
302 |
+ file = cfg, header = TRUE, |
|
303 |
+ check.names = FALSE, stringsAsFactors = FALSE, |
|
304 |
+ sep = "\t") |
|
286 | 305 |
} |
287 | 306 |
else if (fileExtension == "csv") { |
288 |
- tab <- read.table(file = cfg, header = TRUE, |
|
289 |
- check.names = FALSE, stringsAsFactors = FALSE, |
|
290 |
- sep = ",") |
|
307 |
+ tab <- read.table( |
|
308 |
+ file = cfg, header = TRUE, |
|
309 |
+ check.names = FALSE, stringsAsFactors = FALSE, |
|
310 |
+ sep = ",") |
|
291 | 311 |
} |
292 | 312 |
else if (fileExtension == "xlsx") { |
293 | 313 |
tab <- openxlsx::read.xlsx(cfg) |
294 | 314 |
} |
295 | 315 |
else { |
296 |
- stop("Error during data import: ", cfg, " does not belong to a valid configuration file.") |
|
316 |
+ stop("Error during data import: ", cfg, |
|
317 |
+ " does not belong to a valid configuration file.") |
|
297 | 318 |
} |
298 | 319 |
} |
299 | 320 |
else { |
300 |
- stop("Error during data import: ", cfg, " does not belong to a valid configuration file.") |
|
321 |
+ stop("Error during data import: ", cfg, |
|
322 |
+ " does not belong to a valid configuration file.") |
|
301 | 323 |
} |
302 | 324 |
cfg <- tab |
303 | 325 |
} |
... | ... |
@@ -311,21 +333,25 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
311 | 333 |
Experiment = Path = Compound <- NULL |
312 | 334 |
isValidDF <- FALSE |
313 | 335 |
if (is.data.frame(infoTable)) { |
314 |
- if ((ncol(infoTable) > 1) & ("Experiment" %in% colnames(infoTable))) { |
|
336 |
+ if ((ncol(infoTable) > 1) & |
|
337 |
+ ("Experiment" %in% colnames(infoTable))) { |
|
315 | 338 |
isValidDF <- TRUE |
316 | 339 |
} |
317 | 340 |
} |
318 | 341 |
if (!is.character(infoTable) & !isValidDF) { |
319 |
- stop("'infoTable' must either be a data frame with an 'Experiment' column \n |
|
320 |
- and at least one isobaric label column, or a filename pointing at a \n |
|
321 |
- table that fulfills the same criteria") |
|
342 |
+ stop("'infoTable' must either be a data frame", |
|
343 |
+ " with an 'Experiment' column \n", |
|
344 |
+ "and at least one isobaric label column,", |
|
345 |
+ "or a filename pointing at a \n", |
|
346 |
+ "table that fulfills the same criteria") |
|
322 | 347 |
} |
323 | 348 |
isValidType <- type %in% c("2D") |
324 | 349 |
if (!isValidType) { |
325 | 350 |
stop("'type' must have this value: '2D'") |
326 | 351 |
} |
327 | 352 |
infoTable <- TPP_importFct_readConfigTable(cfg = infoTable) |
328 |
- infoTable$Experiment <- TPP_importFct_checkExperimentCol(infoTable$Experiment) |
|
353 |
+ infoTable$Experiment <- |
|
354 |
+ TPP_importFct_checkExperimentCol(infoTable$Experiment) |
|
329 | 355 |
infoTable <- subset(infoTable, Experiment != "") |
330 | 356 |
givenPaths <- NULL |
331 | 357 |
if (any("Path" %in% colnames(infoTable))) { |
... | ... |
@@ -360,31 +386,38 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
360 | 386 |
if (type == "2D") { |
361 | 387 |
temperatures <- infoTable$Temperature |
362 | 388 |
if (is.null(temperatures) | length(temperatures) < 2) { |
363 |
- m1 <- "Insufficient temperatures (<2) specified in config file." |
|
364 |
- m2 <- "Does your configuration table have the correct column names?" |
|
389 |
+ m1 <- paste("Insufficient temperatures (<2)", |
|
390 |
+ "specified in config file.") |
|
391 |
+ m2 <- paste("Does your configuration table", |
|
392 |
+ "have the correct column names?") |
|
365 | 393 |
stop(m1, "\n", m2) |
366 | 394 |
} |
367 | 395 |
else if (length(which(!infoTable$RefCol %in% labelColsNew)) != |
368 | 396 |
0) { |
369 |
- stop("Labels in reference column not found in any of teh label columns.") |
|
397 |
+ stop("Labels in reference column not found", |
|
398 |
+ "in any of teh label columns.") |
|
370 | 399 |
} |
371 | 400 |
hasCompoundCol <- any(allCols == "Compound") |
372 | 401 |
if (!hasCompoundCol) { |
373 |
- m <- "Config table of a 2D-TPP experiment needs a 'Compound' column." |
|
402 |
+ m <- paste("Config table of a 2D-TPP experiment", |
|
403 |
+ "needs a 'Compound' column.") |
|
374 | 404 |
stop(m, "\n") |
375 | 405 |
} |
376 | 406 |
else { |
377 |
- infoTable <- infoTable %>% mutate(Compound = gsub("([^[:alnum:]])", |
|
378 |
- "_", Compound)) |
|
407 |
+ infoTable <- infoTable %>% |
|
408 |
+ mutate(Compound = |
|
409 |
+ gsub("([^[:alnum:]])", "_", Compound)) |
|
379 | 410 |
} |
380 | 411 |
out <- infoTable |
381 | 412 |
} |
382 | 413 |
else { |
383 | 414 |
temperatures <- subset(infoTable, select = labelColsNew) |
384 | 415 |
tempMatrix <- TPP_importCheckTemperatures(temp = temperatures) |
385 |
- infoList <- list(expNames = as.character(infoTable$Experiment), |
|
386 |
- expCond = infoTable$Condition, files = givenPaths, |
|
387 |
- compStrs = compStrs, labels = labelColsNew, tempMatrix = tempMatrix) |
|
416 |
+ infoList <- list( |
|
417 |
+ expNames = as.character(infoTable$Experiment), |
|
418 |
+ expCond = infoTable$Condition, files = givenPaths, |
|
419 |
+ compStrs = compStrs, labels = labelColsNew, |
|
420 |
+ tempMatrix = tempMatrix) |
|
388 | 421 |
out <- infoList |
389 | 422 |
} |
390 | 423 |
return(out) |
... | ... |
@@ -504,9 +537,11 @@ annotateDataList <- function(dataList, geneNameVar, configLong, |
504 | 537 |
datLong <- dat %>% tbl_df() %>% |
505 | 538 |
gather(channel, signal, matches(intensityStr), matches(fcStr)) %>% |
506 | 539 |
mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>% |
507 |
- left_join(configLong %>% dplyr::select(Temperature, RefCol, label, conc), |
|
540 |
+ left_join(configLong %>% |
|
541 |
+ dplyr::select(Temperature, RefCol, label, conc), |
|
508 | 542 |
by = c("temperature" = "Temperature", "label")) %>% |
509 |
- mutate(spread_var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>% |
|
543 |
+ mutate(spread_var = |
|
544 |
+ ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>% |
|
510 | 545 |
dplyr::select(-channel, -unique_ID) %>% |
511 | 546 |
spread(spread_var, signal) |
512 | 547 |
})) |
... | ... |
@@ -520,11 +555,13 @@ filterOutContaminants <- function(dataLong){ |
520 | 555 |
} |
521 | 556 |
|
522 | 557 |
checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
523 |
- # internal function to check that protein fold changes are computed |
|
558 |
+ # internal function to check that protein |
|
559 |
+ # fold changes are computed |
|
524 | 560 |
# relative to the correct TMT channel |
525 | 561 |
label <- RefCol <- rel_value <- raw_value <- conc <- NULL |
526 | 562 |
|
527 |
- if(!all(filter(dataLong, label == RefCol)$rel_value == 1, na.rm = TRUE)){ |
|
563 |
+ if(!all(filter(dataLong, label == RefCol)$rel_value == 1, |
|
564 |
+ na.rm = TRUE)){ |
|
528 | 565 |
message("Recomputing ratios!") |
529 | 566 |
dataOut <- dataLong %>% |
530 | 567 |
dplyr::group_by_(idVar, "temperature") %>% |
... | ... |
@@ -547,7 +584,8 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
547 | 584 |
|
548 | 585 |
#' @importFrom stats median |
549 | 586 |
medianNormalizeRatios <- function(dataLong){ |
550 |
- # internal function to perform median normalization of ratios |
|
587 |
+ # internal function to perform median normalization |
|
588 |
+ # of ratios |
|
551 | 589 |
rel_value <- temperature <- conc <- |
552 | 590 |
raw_rel_value <- NULL |
553 | 591 |
|
... | ... |
@@ -562,7 +600,8 @@ medianNormalizeRatios <- function(dataLong){ |
562 | 600 |
} |
563 | 601 |
|
564 | 602 |
renameColumns <- function(dataLong, idVar, geneNameVar){ |
565 |
- # internal function to rename column names to match lazyeval variable |
|
603 |
+ # internal function to rename column names to |
|
604 |
+ # match lazyeval variable |
|
566 | 605 |
# names of main function |
567 | 606 |
clustername <- representative <- NULL |
568 | 607 |
|
... | ... |
@@ -570,7 +609,8 @@ renameColumns <- function(dataLong, idVar, geneNameVar){ |
570 | 609 |
"clustername" = geneNameVar) %>% |
571 | 610 |
group_by(clustername) %>% |
572 | 611 |
mutate(representative = |
573 |
- paste_rmNA(unique(unlist(strsplit(representative, split = "\\|"))), |
|
612 |
+ paste_rmNA(unique(unlist(strsplit(representative, |
|
613 |
+ split = "\\|"))), |
|
574 | 614 |
sep = "|")) %>% |
575 | 615 |
ungroup() |
576 | 616 |
} |
... | ... |
@@ -610,7 +650,8 @@ renameColumns <- function(dataLong, idVar, geneNameVar){ |
610 | 650 |
#' @examples |
611 | 651 |
#' data("config_tab") |
612 | 652 |
#' data("raw_dat_list") |
613 |
-#' import_df <- import2dDataset(configTable = config_tab, data = raw_dat_list, |
|
653 |
+#' import_df <- import2dDataset(configTable = config_tab, |
|
654 |
+#' data = raw_dat_list, |
|
614 | 655 |
#' idVar = "protein_id", |
615 | 656 |
#' intensityStr = "signal_sum_", |
616 | 657 |
#' fcStr = "rel_fc_", |
... | ... |
@@ -637,7 +678,8 @@ import2dDataset <- function(configTable, data, |
637 | 678 |
medianNormalizeFC = TRUE, |
638 | 679 |
filterContaminants = TRUE){ |
639 | 680 |
|
640 |
- configWide <- TPP_importCheckConfigTable(infoTable = configTable, type = "2D") |
|
681 |
+ configWide <- TPP_importCheckConfigTable( |
|
682 |
+ infoTable = configTable, type = "2D") |
|
641 | 683 |
configLong <- configWide2Long(configWide = configWide) |
642 | 684 |
|
643 | 685 |
dataList <- import2dMain(configTable = configWide, |
... | ... |
@@ -548,6 +548,9 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
548 | 548 |
#' @importFrom stats median |
549 | 549 |
medianNormalizeRatios <- function(dataLong){ |
550 | 550 |
# internal function to perform median normalization of ratios |
551 |
+ rel_value <- temperature <- conc <- |
|
552 |
+ raw_rel_value <- NULL |
|
553 |
+ |
|
551 | 554 |
dataOut <- dataLong %>% |
552 | 555 |
rename(raw_rel_value = rel_value) %>% |
553 | 556 |
group_by(temperature, conc) %>% |
... | ... |
@@ -561,6 +564,8 @@ medianNormalizeRatios <- function(dataLong){ |
561 | 564 |
renameColumns <- function(dataLong, idVar, geneNameVar){ |
562 | 565 |
# internal function to rename column names to match lazyeval variable |
563 | 566 |
# names of main function |
567 |
+ clustername <- representative <- NULL |
|
568 |
+ |
|
564 | 569 |
dplyr::rename_(dataLong, "representative" = idVar, |
565 | 570 |
"clustername" = geneNameVar) %>% |
566 | 571 |
group_by(clustername) %>% |
... | ... |
@@ -1,6 +1,7 @@ |
1 | 1 |
TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
2 | 2 |
# internal function copied from TPP package to avoid |
3 | 3 |
# import of non-exported package functions |
4 |
+ . <- NULL |
|
4 | 5 |
isDF <- !is.null(dataframes) |
5 | 6 |
isF <- !is.null(files) |
6 | 7 |
isBoth <- isDF & isF |
... | ... |
@@ -18,7 +19,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
18 | 19 |
isClassDF <- is.data.frame(dataframes) |
19 | 20 |
if (isClassList) { |
20 | 21 |
classesInList <- dataframes %>% |
21 |
- sapply(. %>% inherits(., "data.frame")) |
|
22 |
+ vapply(. %>% inherits(., "data.frame"), TRUE) |
|
22 | 23 |
if (!all(classesInList)) { |
23 | 24 |
stop(paste("Argument 'dataframes' contains elements that are", |
24 | 25 |
"not of type 'data.frame' at the following positions: "), |
... | ... |
@@ -43,6 +44,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
43 | 44 |
} |
44 | 45 |
|
45 | 46 |
#' @importFrom utils read.delim |
47 |
+#' @importFrom RCurl url.exists |
|
46 | 48 |
TPP_importFct_readFiles <- function (files, naStrs){ |
47 | 49 |
# internal function copied from TPP package to avoid |
48 | 50 |
# import of non-exported package functions |
... | ... |
@@ -74,7 +76,7 @@ TPP_importFct_removeDuplicates <- function(inDF, refColName, |
74 | 76 |
for (nU in nonUniques) { |
75 | 77 |
tmpDF = subset(inDF, get(refColName) == nU) |
76 | 78 |
nonNArows = NULL |
77 |
- for (r in 1:nrow(tmpDF)) { |
|
79 |
+ for (r in seq_len(nrow(tmpDF))) { |
|
78 | 80 |
if (any(!is.na(tmpDF[r, nonNAColNames]))) { |
79 | 81 |
nonNArows = c(nonNArows, r) |
80 | 82 |
} |
... | ... |
@@ -217,7 +219,7 @@ TPP_checkFunctionArgs <- function(functionCall, expectedArguments){ |
217 | 219 |
# internal function copied from TPP package to avoid |
218 | 220 |
# import of non-exported package functions |
219 | 221 |
myArgs <- names(functionCall) |
220 |
- sapply(expectedArguments, function(arg) { |
|
222 |
+ lapply(expectedArguments, function(arg) { |
|
221 | 223 |
if (!arg %in% myArgs) { |
222 | 224 |
stop("Error in ", paste(functionCall)[1], ": argument '", |
223 | 225 |
arg, "' is missing, with no default", call. = FALSE) |
... | ... |
@@ -335,23 +337,8 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){ |
335 | 337 |
givenPaths <- infoTable$Path |
336 | 338 |
} |
337 | 339 |
} |
338 |
- if (type == "TR") { |
|
339 |
- infoTable <- TPP_importFct_replaceReplicateColumn(cfg = infoTable) |
|
340 |
- } |
|
341 |
- if (type == "TR") { |
|
342 |
- compStrs <- TPP_importFct_checkComparisons(confgTable = infoTable) |
|
343 |
- } |
|
344 |
- else { |
|
345 |
- compStrs <- NA |
|
346 |
- } |
|
347 |
- if (type == "TR") { |
|
348 |
- infoTable$Condition <- TPP_importFct_checkConditions( |
|
349 |
- infoTable$Condition, |
|
350 |
- nrow(infoTable)) |
|
351 |
- } |
|
352 |
- else { |
|
353 |
- infoTable$Condition <- NULL |
|
354 |
- } |
|
340 |
+ compStrs <- NA |
|
341 |
+ infoTable$Condition <- NULL |
|
355 | 342 |
allCols <- colnames(infoTable) |
356 | 343 |
labelCols <- TPP_detectLabelColumnsInConfigTable(allColumns = allCols) |
357 | 344 |
labelValues <- infoTable[, labelCols] |
... | ... |
@@ -470,13 +457,13 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
470 | 457 |
mutate(temperature = tTmp, experiment = expTmp, unique_ID = idsAnnotated) |
471 | 458 |
return(dataFinal) |
472 | 459 |
}) |
473 |
- newNames <- sapply(seq(nrow(configTable)), function(iTmp) { |
|
460 |
+ newNames <- vapply(seq(nrow(configTable)), function(iTmp) { |
|
474 | 461 |
rowTmp <- configTable[iTmp, ] |
475 | 462 |
tTmp <- rowTmp$Temperature |
476 | 463 |
expTmp <- rowTmp$Experiment |
477 | 464 |
newName <- paste(expTmp, tTmp, sep = "_") |
478 | 465 |
return(newName) |
479 |
- }) |
|
466 |
+ }, "") |
|
480 | 467 |
names(dataList) <- newNames |
481 | 468 |
out <- TPP_importFct_rmZeroSias(configTable = configTable, |
482 | 469 |
data.list = dataList, |
... | ... |
@@ -487,14 +474,20 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
487 | 474 |
#' @importFrom tidyr gather |
488 | 475 |
configWide2Long <- function(configWide){ |
489 | 476 |
# internal function to tranform config table into long format |
477 |
+ |
|
478 |
+ Path <- label <- conc <- Compound <- Experiment <- |
|
479 |
+ Temperature <- RefCol <- NULL |
|
480 |
+ |
|
490 | 481 |
if(any(grepl("Path", colnames(configWide)))){ |
491 | 482 |
configLong <- configWide %>% |
492 | 483 |
dplyr::select(-Path) %>% |
493 |
- gather(label, conc, -Compound, -Experiment, -Temperature, -RefCol) %>% |
|
484 |
+ gather(label, conc, -Compound, |
|
485 |
+ -Experiment, -Temperature, -RefCol) %>% |
|
494 | 486 |
filter(conc != "-") |
495 | 487 |
}else{ |
496 | 488 |
configLong <- configWide %>% |
497 |
- gather(label, conc, -Compound, -Experiment, -Temperature, -RefCol) %>% |
|
489 |
+ gather(label, conc, -Compound, |
|
490 |
+ -Experiment, -Temperature, -RefCol) %>% |
|
498 | 491 |
filter(conc != "-") |
499 | 492 |
} |
500 | 493 |
} |
... | ... |
@@ -505,7 +498,7 @@ annotateDataList <- function(dataList, geneNameVar, configLong, |
505 | 498 |
# internal function to annotate list of 2D-TPP data subtables with |
506 | 499 |
# information from config table |
507 | 500 |
channel <- signal <- Temperature <- RefCol <- label <- |
508 |
- conc <- unique_ID <- NULL |
|
501 |
+ conc <- unique_ID <- spread_var <- NULL |
|
509 | 502 |
|
510 | 503 |
combinedTab <- bind_rows(lapply(dataList, function(dat){ |
511 | 504 |
datLong <- dat %>% tbl_df() %>% |
... | ... |
@@ -522,12 +515,15 @@ annotateDataList <- function(dataList, geneNameVar, configLong, |
522 | 515 |
|
523 | 516 |
filterOutContaminants <- function(dataLong){ |
524 | 517 |
# internal function to filter out contaminants |
518 |
+ representative <- NULL |
|
525 | 519 |
filter(dataLong, !grepl("##", representative)) |
526 | 520 |
} |
527 | 521 |
|
528 | 522 |
checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
529 | 523 |
# internal function to check that protein fold changes are computed |
530 | 524 |
# relative to the correct TMT channel |
525 |
+ label <- RefCol <- rel_value <- raw_value <- conc <- NULL |
|
526 |
+ |
|
531 | 527 |
if(!all(filter(dataLong, label == RefCol)$rel_value == 1, na.rm = TRUE)){ |
532 | 528 |
message("Recomputing ratios!") |
533 | 529 |
dataOut <- dataLong %>% |
... | ... |
@@ -428,7 +428,7 @@ import2dMain <- function(configTable, data, idVar, fcStr, |
428 | 428 |
} |
429 | 429 |
configTable %>% group_by(Experiment, Compound, |
430 | 430 |
Temperature, RefCol) |
431 |
- iVec <- 1:nrow(configTable) |
|
431 |
+ iVec <- seq_len(nrow(configTable)) |
|
432 | 432 |
dataList <- lapply(iVec, function(iTmp) { |
433 | 433 |
rowTmp <- configTable[iTmp, ] |
434 | 434 |
expTmp <- rowTmp$Experiment |
... | ... |
@@ -606,6 +606,21 @@ renameColumns <- function(dataLong, idVar, geneNameVar){ |
606 | 606 |
#' |
607 | 607 |
#' @return tidy data frame representing a 2D-TPP dataset |
608 | 608 |
#' |
609 |
+#' @examples |
|
610 |
+#' data("config_tab") |
|
611 |
+#' data("raw_dat_list") |
|
612 |
+#' import_df <- import2dDataset(configTable = config_tab, data = raw_dat_list, |
|
613 |
+#' idVar = "protein_id", |
|
614 |
+#' intensityStr = "signal_sum_", |
|
615 |
+#' fcStr = "rel_fc_", |
|
616 |
+#' nonZeroCols = "qusm", |
|
617 |
+#' geneNameVar = "gene_name", |
|
618 |
+#' addCol = NULL, |
|
619 |
+#' qualColName = "qupm", |
|
620 |
+#' naStrs = c("NA", "n/d", "NaN"), |
|
621 |
+#' concFactor = 1e6, |
|
622 |
+#' medianNormalizeFC = TRUE, |
|
623 |
+#' filterContaminants = TRUE) |
|
609 | 624 |
#' |
610 | 625 |
#' @export |
611 | 626 |
import2dDataset <- function(configTable, data, |
... | ... |
@@ -614,7 +629,7 @@ import2dDataset <- function(configTable, data, |
614 | 629 |
fcStr = "rel_fc_protein_", |
615 | 630 |
nonZeroCols = "qssm", |
616 | 631 |
geneNameVar = "clustername", |
617 |
- addCol = "", |
|
632 |
+ addCol = NULL, |
|
618 | 633 |
qualColName = "qupm", |
619 | 634 |
naStrs = c("NA", "n/d", "NaN"), |
620 | 635 |
concFactor = 1e6, |
... | ... |
@@ -604,6 +604,9 @@ renameColumns <- function(dataLong, idVar, geneNameVar){ |
604 | 604 |
#' @param concFactor numeric value that indicates how concentrations need to |
605 | 605 |
#' be adjusted to yield total unit e.g. default mmol - 1e6 |
606 | 606 |
#' |
607 |
+#' @return tidy data frame representing a 2D-TPP dataset |
|
608 |
+#' |
|
609 |
+#' |
|
607 | 610 |
#' @export |
608 | 611 |
import2dDataset <- function(configTable, data, |
609 | 612 |
idVar = "representative", |
... | ... |
@@ -499,19 +499,23 @@ configWide2Long <- function(configWide){ |
499 | 499 |
} |
500 | 500 |
} |
501 | 501 |
|
502 |
+#' @importFrom tidyr spread |
|
502 | 503 |
annotateDataList <- function(dataList, geneNameVar, configLong, |
503 | 504 |
intensityStr, fcStr){ |
504 | 505 |
# internal function to annotate list of 2D-TPP data subtables with |
505 | 506 |
# information from config table |
507 |
+ channel <- signal <- Temperature <- RefCol <- label <- |
|
508 |
+ conc <- unique_ID <- NULL |
|
509 |
+ |
|
506 | 510 |
combinedTab <- bind_rows(lapply(dataList, function(dat){ |
507 | 511 |
datLong <- dat %>% tbl_df() %>% |
508 | 512 |
gather(channel, signal, matches(intensityStr), matches(fcStr)) %>% |
509 | 513 |
mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>% |
510 | 514 |
left_join(configLong %>% dplyr::select(Temperature, RefCol, label, conc), |
511 | 515 |
by = c("temperature" = "Temperature", "label")) %>% |
512 |
- mutate(var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>% |
|
516 |
+ mutate(spread_var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>% |
|
513 | 517 |
dplyr::select(-channel, -unique_ID) %>% |
514 |
- spread(var, signal) |
|
518 |
+ spread(spread_var, signal) |
|
515 | 519 |
})) |
516 | 520 |
return(combinedTab) |
517 | 521 |
} |
... | ... |
@@ -545,12 +549,14 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){ |
545 | 549 |
} |
546 | 550 |
} |
547 | 551 |
|
552 |
+#' @importFrom stats median |
|
548 | 553 |
medianNormalizeRatios <- function(dataLong){ |
549 | 554 |
# internal function to perform median normalization of ratios |
550 | 555 |
dataOut <- dataLong %>% |
551 | 556 |
rename(raw_rel_value = rel_value) %>% |
552 | 557 |
group_by(temperature, conc) %>% |
553 |
- mutate(rel_value = raw_rel_value / median(raw_rel_value, na.rm = TRUE)) %>% |
|
558 |
+ mutate(rel_value = raw_rel_value / |
|
559 |
+ median(raw_rel_value, na.rm = TRUE)) %>% |
|
554 | 560 |
ungroup() |
555 | 561 |
|
556 | 562 |
return(dataOut) |
... | ... |
@@ -1,4 +1,408 @@ |
1 |
-#' @import TPP |
|
1 |
+TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){ |
|
2 |
+ # internal function copied from TPP package to avoid |
|
3 |
+ # import of non-exported package functions |
|
4 |
+ isDF <- !is.null(dataframes) |
|
5 |
+ isF <- !is.null(files) |
|
6 |
+ isBoth <- isDF & isF |
|
7 |
+ isNone <- !(isDF | isF) |
|
8 |
+ if (isBoth) { |
|
9 |
+ stop("Data import function received a filename AND a dataframe object. \n |
|
10 |
+ Please specify only one.") |
|
11 |
+ } |
|
12 |
+ else if (isNone) { |
|
13 |
+ stop("Data import function requires a filename or a dataframe object. \n |
|
14 |
+ Please specify one.") |
|
15 |
+ } |
|
16 |
+ if (isDF) { |
|
17 |
+ isClassList <- is.list(dataframes) && !is.data.frame(dataframes) |
|
18 |
+ isClassDF <- is.data.frame(dataframes) |
|
19 |
+ if (isClassList) { |
|
20 |
+ classesInList <- dataframes %>% |
|
21 |
+ sapply(. %>% inherits(., "data.frame")) |
|
22 |
+ if (!all(classesInList)) { |
|
23 |
+ stop(paste("Argument 'dataframes' contains elements that are", |
|
24 |
+ "not of type 'data.frame' at the following positions: "), |
|
25 |
+ which(!classesInList) %>% paste(collapse = ", "), |
|
26 |
+ ".") |
|
27 |
+ } |
|
28 |
+ } |
|
29 |
+ else if (isClassDF) { |
|
30 |
+ dataframes <- list(dataframes) |
|
31 |
+ names(dataframes) <- expNames |
|
32 |
+ } |
|
33 |
+ else { |
|
34 |
+ stop("Argument 'dataframes' must be either an object of class \n |
|
35 |
+ 'data.frame', or a list of such objects.") |
|
36 |
+ } |
|
37 |
+ } |
|
38 |
+ if (isF) { |
|
39 |
+ files <- as.character(files) |
|
40 |
+ names(files) <- expNames |
|
41 |
+ } |
|
42 |
+ return(list(files = files, dataframes = dataframes)) |
|
43 |
+} |
|
44 |
+ |
|
45 |
+#' @importFrom utils read.delim |
|
46 |
+TPP_importFct_readFiles <- function (files, naStrs){ |
|
47 |
+ # internal function copied from TPP package to avoid |
|
48 |
+ # import of non-exported package functions |
|
49 |
+ expNames <- names(files) |
|
50 |
+ data <- vector("list", length(files)) |
|
51 |
+ names(data) <- expNames |
|
52 |
+ for (expName in expNames) { |
|
53 |
+ fTmp <- files[[expName]] |
|
54 |
+ if (file.exists(fTmp) || url.exists(fTmp)) { |
|
55 |
+ data[[expName]] <- read.delim(fTmp, as.is = TRUE, |
|
56 |
+ na.strings = naStrs, quote = "") |
|
57 |
+ } |
|
58 |
+ else { |
|
59 |
+ stop("File ", fTmp, " could not be found.") |
|
60 |
+ } |
|
61 |
+ } |
|
62 |
+ return(data) |
|
63 |
+} |
|
64 |
+ |
|
65 |
+TPP_importFct_removeDuplicates <- function(inDF, refColName, |
|
66 |
+ nonNAColNames, qualColName){ |
|
67 |
+ # internal function copied from TPP package to avoid |
|
68 |
+ # import of non-exported package functions |
|
69 |
+ message("Removing duplicate identifiers using quality column '", |
|
70 |
+ qualColName, "'...") |
|
71 |
+ nonUniques = unique(inDF[duplicated(inDF[[refColName]]), |
|
72 |
+ refColName]) |
|
73 |
+ retDF = subset(inDF, !(get(refColName) %in% nonUniques)) |
|
74 |
+ for (nU in nonUniques) { |
|
75 |
+ tmpDF = subset(inDF, get(refColName) == nU) |
|
76 |
+ nonNArows = NULL |
|
77 |
+ for (r in 1:nrow(tmpDF)) { |
|
78 |
+ if (any(!is.na(tmpDF[r, nonNAColNames]))) { |
|
79 |
+ nonNArows = c(nonNArows, r) |
|
80 |
+ } |
|
81 |
+ } |
|
82 |
+ if (length(nonNArows) > 1) { |
|
83 |
+ if (is.null(qualColName)) { |
|
84 |
+ useRow = 1 |
|
85 |
+ } |
|
86 |
+ else { |
|
87 |
+ qualVals = tmpDF[nonNArows, qualColName] |
|
88 |
+ useRow = match(max(qualVals), qualVals) |
|
89 |
+ } |
|
90 |
+ } |
|
91 |
+ else { |
|
92 |
+ useRow = nonNArows[1] |
|
93 |
+ } |
|
94 |
+ retDF = rbind(retDF, tmpDF[useRow, ]) |
|
95 |
+ } |
|
96 |
+ message(nrow(retDF), " out of ", nrow(inDF), " rows kept for further analysis.") |
|
97 |
+ return(retDF) |
|
98 |
+} |
|
99 |
+ |
|
100 |
+TPP_replaceZeros <- function(x){ |
|
101 |
+ # internal function copied from TPP package to avoid |
|
102 |
+ # import of non-exported package functions |
|
103 |
+ x[which(x == 0)] <- NA |
|
104 |
+ return(x) |
|
105 |
+} |
|
106 |
+ |
|
107 |
+TPP_importFct_rmZeroSias <- function(configTable, data.list, |
|
108 |
+ intensityStr){ |
|
109 |
+ # internal function copied from TPP package to avoid |
|
110 |
+ # import of non-exported package functions |
|
111 |
+ out <- lapply(names(data.list), function(l.name) { |
|
112 |
+ datTmp <- data.list[[l.name]] |
|
113 |
+ colsTmp <- colnames(datTmp) |
|
114 |
+ intensity.cols <- grep(intensityStr, colsTmp, value = TRUE) |
|
115 |
+ intensity.df <- subset(datTmp, select = intensity.cols) %>% |
|
116 |
+ mutate_all(as.character) %>% mutate_all(as.numeric) |
|
117 |
+ new.intensity.df <- intensity.df %>% mutate_all(TPP_replaceZeros) |
|
118 |
+ datTmp[, intensity.cols] <- new.intensity.df |
|
119 |
+ return(datTmp) |
|
120 |
+ }) |
|
121 |
+ names(out) <- names(data.list) |
|
122 |
+ return(out) |
|
123 |
+} |
|
124 |
+ |
|
125 |
+TPP_importFct_checkExperimentCol <- function(expCol){ |
|
126 |
+ # internal function copied from TPP package to avoid |
|
127 |
+ # import of non-exported package functions |
|
128 |
+ if (is.null(expCol)) { |
|
129 |
+ m <- "Config table needs an 'Experiment' column with unique experiment IDs." |
|
130 |
+ stop(m, "\n") |
|
131 |
+ } |
|
132 |
+ oldExpNames <- expCol |
|
133 |
+ newExpNames <- gsub("([^[:alnum:]])", "_", expCol) |
|
134 |
+ iChanged <- oldExpNames != newExpNames |
|
135 |
+ if (any(iChanged)) { |
|
136 |
+ m1 <- "Replaced non-alphanumeric characters in the 'Experiment' column entries:" |
|
137 |
+ m2 <- paste("'", paste(oldExpNames[iChanged], collapse = "', '"), |
|
138 |
+ "'\nby\n'", paste(newExpNames[iChanged], collapse = "', '"), |
|
139 |
+ sep = "") |
|
140 |
+ message(m1, "\n", m2, "\n") |
|
141 |
+ } |
|
142 |
+ return(newExpNames) |
|
143 |
+} |
|
144 |
+ |
|
145 |
+TPP_importFct_checkComparisons <- function(confgTable){ |
|
146 |
+ # internal function copied from TPP package to avoid |
|
147 |
+ # import of non-exported package functions |
|
148 |
+ expConds <- confgTable$Condition |
|
149 |
+ expNames <- confgTable$Experiment |
|
150 |
+ compCols <- grep("Comparison", colnames(confgTable), ignore.case = TRUE, |
|
151 |
+ value = TRUE) |
|
152 |
+ compChars <- apply(confgTable[compCols], 2, function(x) { |
|
153 |
+ length(grep("[[:alnum:]]", x, value = TRUE)) |
|
154 |
+ }) |
|
155 |
+ comp_unequal_two <- compChars != 2 |
|
156 |
+ if (any(comp_unequal_two)) { |
|
157 |
+ warning(paste("\nThe following comparison columns could not be evaluated", |
|
158 |
+ "because they did not contain exactly two entries:\n\t\t"), |
|
159 |
+ paste(compCols[comp_unequal_two], collapse = ",\n\t\t")) |
|
160 |
+ } |
|
161 |
+ validCompCols <- compCols[!comp_unequal_two] |
|
162 |
+ allCompStrs <- c() |
|
163 |
+ if (length(validCompCols) > 0) { |
|
164 |
+ message("Comparisons will be performed between the following experiments:") |
|
165 |
+ for (colName in validCompCols) { |
|
166 |
+ current_compEntries <- confgTable[[colName]] |
|
167 |
+ current_compRows <- grep("[[:alnum:]]", current_compEntries) |
|