Browse code

updated .checkRatioRef function to omit use of

Nils Kurzawa authored on 28/08/2020 09:02:47
Showing1 changed files
... ...
@@ -409,8 +409,8 @@ TPP_importCheckConfigTable <- function(infoTable, type = "2D"){
409 409
     }
410 410
     else if (length(which(!infoTable$RefCol %in% labelColsNew)) != 
411 411
              0) {
412
-      stop("Labels in reference column not found", 
413
-           " in any of the label columns.")
412
+      stop(paste("Labels in reference column not found", 
413
+                 "in any of the label columns."))
414 414
     }
415 415
     hasCompoundCol <- any(allCols == "Compound")
416 416
     if (!hasCompoundCol) {
... ...
@@ -669,7 +669,7 @@ filterOutContaminants <- function(dataLong){
669 669
           na.rm = TRUE)){
670 670
     message("Recomputing ratios!")
671 671
     dataOut <- dataLong %>%
672
-      dplyr::group_by_(idVar, "temperature") %>%
672
+      group_by(.dots = c(idVar, "temperature")) %>%
673 673
       mutate(rel_value = rel_value/rel_value[label == RefCol]) %>%
674 674
       ungroup %>%
675 675
       filter(!is.na(raw_value)) %>%
Browse code

fixed typo in check config table function

Nils Kurzawa authored on 28/08/2020 08:08:16
Showing1 changed files
... ...
@@ -410,7 +410,7 @@ TPP_importCheckConfigTable <- function(infoTable, type = "2D"){
410 410
     else if (length(which(!infoTable$RefCol %in% labelColsNew)) != 
411 411
              0) {
412 412
       stop("Labels in reference column not found", 
413
-           "in any of teh label columns.")
413
+           " in any of the label columns.")
414 414
     }
415 415
     hasCompoundCol <- any(allCols == "Compound")
416 416
     if (!hasCompoundCol) {
Browse code

added plotting function for heatmap visualization

Nils Kurzawa authored on 17/06/2020 14:59:50
Showing1 changed files
... ...
@@ -627,7 +627,7 @@ annotateDataList <- function(dataList, geneNameVar, configLong,
627 627
     conc <- unique_ID <- spread_var <- NULL
628 628
   
629 629
   combinedTab <- bind_rows(lapply(dataList, function(dat){
630
-    datLong <- dat %>% tbl_df() %>%
630
+    datLong <- dat %>% as_tibble() %>%
631 631
       gather(channel, signal, matches(intensityStr), matches(fcStr)) %>%
632 632
       mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>%
633 633
       left_join(configLong %>% 
Browse code

added test for new function to obtain h0 and h1 model parameters and removed deprecated 'rename_' function usage in import function

Nils Kurzawa authored on 15/01/2020 13:07:44
Showing1 changed files
... ...
@@ -741,8 +741,8 @@ filterOutContaminants <- function(dataLong){
741 741
 renameColumns <- function(dataLong, idVar, geneNameVar){
742 742
   clustername <- representative <- NULL
743 743
   
744
-  dplyr::rename_(dataLong, "representative" = idVar, 
745
-                 "clustername" = geneNameVar) %>%
744
+  dplyr::rename(dataLong, "representative" = idVar, 
745
+                "clustername" = geneNameVar) %>%
746 746
     group_by(clustername) %>%
747 747
     mutate(representative =
748 748
              .paste_rmNA(unique(unlist(strsplit(representative, 
Browse code

forced data frame to be tibble in remove duplicates import function to guarantee functionality

Nils Kurzawa authored on 29/04/2019 05:49:43
Showing1 changed files
... ...
@@ -66,13 +66,14 @@
66 66
   return(data)
67 67
 }
68 68
 
69
+#' @import dplyr
69 70
 .TPP_importFct_removeDuplicates <- function(inDF, refColName, 
70 71
                                            nonNAColNames, qualColName){
71 72
   # internal function copied from TPP package to avoid 
72 73
   # import of non-exported package functions
73 74
   message("Removing duplicate identifiers using quality column '", 
74 75
           qualColName, "'...")
75
-  nonUniques <- unique(inDF[duplicated(inDF[[refColName]]), 
76
+  nonUniques <- unique(as_tibble(inDF)[duplicated(inDF[[refColName]]), 
76 77
                            refColName])
77 78
   retDF <- subset(inDF, !(get(refColName) %in% nonUniques))
78 79
   if(nrow(nonUniques)){
Browse code

adressed review comments as well as possibe

Nils Kurzawa authored on 15/04/2019 08:31:14
Showing1 changed files
... ...
@@ -1,4 +1,4 @@
1
-TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
1
+.TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
2 2
     # internal function copied from TPP package to avoid 
3 3
     # import of non-exported package functions
4 4
     . <- NULL
... ...
@@ -47,7 +47,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
47 47
 
48 48
 #' @importFrom utils read.delim
49 49
 #' @importFrom RCurl url.exists
50
-TPP_importFct_readFiles <- function (files, naStrs){
50
+.TPP_importFct_readFiles <- function (files, naStrs){
51 51
   # internal function copied from TPP package to avoid 
52 52
   # import of non-exported package functions
53 53
   expNames <- names(files)
... ...
@@ -66,51 +66,53 @@ TPP_importFct_readFiles <- function (files, naStrs){
66 66
   return(data)
67 67
 }
68 68
 
69
-TPP_importFct_removeDuplicates <- function(inDF, refColName, 
69
+.TPP_importFct_removeDuplicates <- function(inDF, refColName, 
70 70
                                            nonNAColNames, qualColName){
71 71
   # internal function copied from TPP package to avoid 
72 72
   # import of non-exported package functions
73 73
   message("Removing duplicate identifiers using quality column '", 
74 74
           qualColName, "'...")
75
-  nonUniques = unique(inDF[duplicated(inDF[[refColName]]), 
75
+  nonUniques <- unique(inDF[duplicated(inDF[[refColName]]), 
76 76
                            refColName])
77
-  retDF = subset(inDF, !(get(refColName) %in% nonUniques))
78
-  for (nU in nonUniques) {
79
-    tmpDF = subset(inDF, get(refColName) == nU)
80
-    nonNArows = NULL
81
-    for (r in seq_len(nrow(tmpDF))) {
82
-      if (any(!is.na(tmpDF[r, nonNAColNames]))) {
83
-        nonNArows = c(nonNArows, r)
84
-      }
85
-    }
86
-    if (length(nonNArows) > 1) {
87
-      if (is.null(qualColName)) {
88
-        useRow = 1
89
-      }
90
-      else {
91
-        qualVals = tmpDF[nonNArows, qualColName]
92
-        useRow = match(max(qualVals), qualVals)
77
+  retDF <- subset(inDF, !(get(refColName) %in% nonUniques))
78
+  if(nrow(nonUniques)){
79
+      for (nU in nonUniques) {
80
+          tmpDF <- subset(inDF, get(refColName) == nU)
81
+          nonNArows <- NULL
82
+          for (r in seq_len(nrow(tmpDF))) {
83
+              if (any(!is.na(tmpDF[r, nonNAColNames]))) {
84
+                  nonNArows <- c(nonNArows, r)
85
+              }
86
+          }
87
+          if (length(nonNArows) > 1) {
88
+              if (is.null(qualColName)) {
89
+                  useRow <- 1
90
+              }
91
+              else {
92
+                  qualVals <- tmpDF[nonNArows, qualColName]
93
+                  useRow <- match(max(qualVals), qualVals)
94
+              }
95
+          }
96
+          else {
97
+              useRow <- nonNArows[1]
98
+          }
99
+          retDF <- rbind(retDF, tmpDF[useRow, ])
93 100
       }
94
-    }
95
-    else {
96
-      useRow = nonNArows[1]
97
-    }
98
-    retDF = rbind(retDF, tmpDF[useRow, ])
99 101
   }
100 102
   message(nrow(retDF), " out of ", nrow(inDF), 
101 103
           " rows kept for further analysis.")
102 104
   return(retDF)
103 105
 }
104 106
 
105
-TPP_replaceZeros <- function(x){
107
+.TPP_replaceZeros <- function(x){
106 108
   # internal function copied from TPP package to avoid 
107 109
   # import of non-exported package functions
108 110
   x[which(x == 0)] <- NA
109 111
   return(x)
110 112
 }
111 113
 
112
-TPP_importFct_rmZeroSias <- function(configTable, data.list, 
113
-                                     intensityStr){
114
+.TPP_importFct_rmZeroSias <- function(data.list, 
115
+                                      intensityStr){
114 116
   # internal function copied from TPP package to avoid 
115 117
   # import of non-exported package functions
116 118
   out <- lapply(names(data.list), function(l.name) {
... ...
@@ -119,7 +121,7 @@ TPP_importFct_rmZeroSias <- function(configTable, data.list,
119 121
     intensity.cols <- grep(intensityStr, colsTmp, value = TRUE)
120 122
     intensity.df <- subset(datTmp, select = intensity.cols) %>% 
121 123
       mutate_all(as.character) %>% mutate_all(as.numeric)
122
-    new.intensity.df <- intensity.df %>% mutate_all(TPP_replaceZeros)
124
+    new.intensity.df <- intensity.df %>% mutate_all(.TPP_replaceZeros)
123 125
     datTmp[, intensity.cols] <- new.intensity.df
124 126
     return(datTmp)
125 127
   })
... ...
@@ -127,7 +129,7 @@ TPP_importFct_rmZeroSias <- function(configTable, data.list,
127 129
   return(out)
128 130
 }
129 131
 
130
-TPP_importFct_checkExperimentCol <- function(expCol){
132
+.TPP_importFct_checkExperimentCol <- function(expCol){
131 133
   # internal function copied from TPP package to avoid 
132 134
   # import of non-exported package functions
133 135
   if (is.null(expCol)) {
... ...
@@ -149,7 +151,7 @@ TPP_importFct_checkExperimentCol <- function(expCol){
149 151
   return(newExpNames)
150 152
 }
151 153
 
152
-TPP_importFct_checkComparisons <- function(confgTable){
154
+.TPP_importFct_checkComparisons <- function(confgTable){
153 155
   # internal function copied from TPP package to avoid 
154 156
   # import of non-exported package functions
155 157
   expConds <- confgTable$Condition
... ...
@@ -167,7 +169,7 @@ TPP_importFct_checkComparisons <- function(confgTable){
167 169
   }
168 170
   validCompCols <- compCols[!comp_unequal_two]
169 171
   allCompStrs <- c()
170
-  if (length(validCompCols) > 0) {
172
+  if (length(validCompCols)) {
171 173
     message("Comparisons will be performed between the following experiments:")
172 174
     for (colName in validCompCols) {
173 175
       current_compEntries <- confgTable[[colName]]
... ...
@@ -196,7 +198,7 @@ TPP_importFct_checkComparisons <- function(confgTable){
196 198
 }
197 199
 
198 200
 #' @importFrom stringr str_to_title
199
-TPP_importFct_checkConditions <- function(condInfo, 
201
+.TPP_importFct_checkConditions <- function(condInfo, 
200 202
                                           expectedLength){
201 203
   # internal function copied from TPP package to avoid 
202 204
   # import of non-exported package functions
... ...
@@ -227,7 +229,7 @@ TPP_importFct_checkConditions <- function(condInfo,
227 229
   return(condInfo)
228 230
 }
229 231
 
230
-TPP_checkFunctionArgs <- 
232
+.TPP_checkFunctionArgs <- 
231 233
   function(functionCall, expectedArguments){
232 234
   # internal function copied from TPP package to avoid 
233 235
   # import of non-exported package functions
... ...
@@ -242,7 +244,7 @@ TPP_checkFunctionArgs <-
242 244
   })
243 245
 }
244 246
 
245
-TPP_nonLabelColumns <- function(){
247
+.TPP_nonLabelColumns <- function(){
246 248
   # internal function copied from TPP package to avoid 
247 249
   # import of non-exported package functions
248 250
   out <- data.frame(
... ...
@@ -259,12 +261,12 @@ TPP_nonLabelColumns <- function(){
259 261
   return(out)
260 262
 }
261 263
 
262
-TPP_detectLabelColumnsInConfigTable <- 
264
+.TPP_detectLabelColumnsInConfigTable <- 
263 265
   function(allColumns){
264 266
   # internal function copied from TPP package to avoid 
265 267
   # import of non-exported package functions
266
-  TPP_checkFunctionArgs(match.call(), c("allColumns"))
267
-  noLabelCols <- TPP_nonLabelColumns()$column %>% 
268
+  .TPP_checkFunctionArgs(match.call(), c("allColumns"))
269
+  noLabelCols <- .TPP_nonLabelColumns()$column %>% 
268 270
     as.character %>% 
269 271
     unique
270 272
   compCols <- grep("comparison", allColumns, value = TRUE, 
... ...
@@ -274,7 +276,7 @@ TPP_detectLabelColumnsInConfigTable <-
274 276
   return(labelCols)
275 277
 }
276 278
 
277
-TPP_importCheckTemperatures <- function(temp){
279
+.TPP_importCheckTemperatures <- function(temp){
278 280
   # internal function copied from TPP package to avoid 
279 281
   # import of non-exported package functions
280 282
   tempMatrix <- as.matrix(temp)
... ...
@@ -290,7 +292,7 @@ TPP_importCheckTemperatures <- function(temp){
290 292
 
291 293
 #' @importFrom openxlsx read.xlsx
292 294
 #' @importFrom utils read.table
293
-TPP_importFct_readConfigTable <- function(cfg){
295
+.TPP_importFct_readConfigTable <- function(cfg){
294 296
   # internal function copied from TPP package to avoid 
295 297
   # import of non-exported package functions
296 298
   if (is.character(cfg)) {
... ...
@@ -326,11 +328,23 @@ TPP_importFct_readConfigTable <- function(cfg){
326 328
   return(cfg)
327 329
 }
328 330
 
329
-TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
330
-  # internal function copied from TPP package to avoid 
331
-  # import of non-exported package functions
332
-  TPP_checkFunctionArgs(match.call(), c("infoTable", "type"))
333
-  Experiment = Path = Compound <- NULL
331
+#' Import and chech configuration table
332
+#' 
333
+#' @param infoTable character string of a file path to
334
+#' a config table (excel,txt or csv file) or data frame
335
+#' containing a config table
336
+#' @param type charater string indicating dataset type
337
+#' default is 2D
338
+#' 
339
+#' @return data frame with config table
340
+#' 
341
+#' @examples 
342
+#' data("config_tab")
343
+#' TPP_importCheckConfigTable(config_tab, type = "2D")
344
+#' @export
345
+TPP_importCheckConfigTable <- function(infoTable, type = "2D"){
346
+  .TPP_checkFunctionArgs(match.call(), c("infoTable", "type"))
347
+  Experiment <- Path <- Compound <- NULL
334 348
   isValidDF <- FALSE
335 349
   if (is.data.frame(infoTable)) {
336 350
     if ((ncol(infoTable) > 1) & 
... ...
@@ -349,9 +363,9 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
349 363
   if (!isValidType) {
350 364
     stop("'type' must have this value: '2D'")
351 365
   }
352
-  infoTable <- TPP_importFct_readConfigTable(cfg = infoTable)
366
+  infoTable <- .TPP_importFct_readConfigTable(cfg = infoTable)
353 367
   infoTable$Experiment <- 
354
-    TPP_importFct_checkExperimentCol(infoTable$Experiment)
368
+    .TPP_importFct_checkExperimentCol(infoTable$Experiment)
355 369
   infoTable <- subset(infoTable, Experiment != "")
356 370
   givenPaths <- NULL
357 371
   if (any("Path" %in% colnames(infoTable))) {
... ...
@@ -366,7 +380,7 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
366 380
   compStrs <- NA
367 381
   infoTable$Condition <- NULL
368 382
   allCols <- colnames(infoTable)
369
-  labelCols <- TPP_detectLabelColumnsInConfigTable(allColumns = allCols)
383
+  labelCols <- .TPP_detectLabelColumnsInConfigTable(allColumns = allCols)
370 384
   labelValues <- infoTable[, labelCols]
371 385
   labelValuesNum <- suppressWarnings(labelValues %>% apply(2, 
372 386
                                                            as.numeric))
... ...
@@ -412,7 +426,7 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
412 426
   }
413 427
   else {
414 428
     temperatures <- subset(infoTable, select = labelColsNew)
415
-    tempMatrix <- TPP_importCheckTemperatures(temp = temperatures)
429
+    tempMatrix <- .TPP_importCheckTemperatures(temp = temperatures)
416 430
     infoList <- list(
417 431
       expNames = as.character(infoTable$Experiment), 
418 432
       expCond = infoTable$Condition, files = givenPaths, 
... ...
@@ -423,6 +437,45 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
423 437
   return(out)
424 438
 }
425 439
 
440
+#' Import 2D-TPP dataset main function
441
+#' 
442
+#' @param configTable character string of a file path to a config table
443
+#' @param data possible list of datasets from different MS runs 
444
+#' corresponding to a 2D-TPP dataset, circumvents loading datasets 
445
+#' referencend in config table, default is NULL
446
+#' @param idVar character string indicating which data column provides the 
447
+#' unique identifiers for each protein.
448
+#' @param intensityStr character string indicating which columns contain 
449
+#' raw intensities measurements
450
+#' @param fcStr character string indicating which columns contain the actual 
451
+#' fold change values. Those column names containing the suffix \code{fcStr} 
452
+#' will be regarded as containing fold change values.
453
+#' @param naStrs character vector indicating missing values in the data table. 
454
+#' When reading data from file, this value will be passed on to the argument 
455
+#' \code{na.strings} in function \code{read.delim}.
456
+#' @param addCol character string indicating additional column to import
457
+#' @param nonZeroCols column like default qssm that should be imported and
458
+#' requested to be non-zero in analyzed data
459
+#' @param qualColName character string indicating which column can be used for 
460
+#' additional quality criteria when deciding between different non-unique 
461
+#' protein identifiers.
462
+#' 
463
+#' @return list of data frames containing different
464
+#' datasets
465
+#' 
466
+#' @examples 
467
+#' data("config_tab")
468
+#' data("raw_dat_list")
469
+#' dataList <- import2dMain(configTable = config_tab,
470
+#'                          data = raw_dat_list,
471
+#'                          idVar = "protein_id",
472
+#'                          fcStr = "rel_fc_",
473
+#'                          addCol = "gene_name",
474
+#'                          naStrs = NA,
475
+#'                          intensityStr = "signal_sum_",
476
+#'                          nonZeroCols = "qusm",
477
+#'                          qualColName = "qupm")
478
+#' @export
426 479
 import2dMain <- function(configTable, data, idVar, fcStr,
427 480
                          addCol, naStrs, intensityStr,
428 481
                          qualColName, nonZeroCols){
... ...
@@ -434,20 +487,18 @@ import2dMain <- function(configTable, data, idVar, fcStr,
434 487
       files <- NULL
435 488
     }
436 489
   }
437
-  Experiment = Compound = Temperature = RefCol <- NULL
490
+  Experiment <- Compound <- Temperature <- RefCol <- NULL
438 491
   expNames <- configTable$Experiment
439
-  argList <- TPP_importFct_CheckDataFormat(dataframes = data, 
492
+  argList <- .TPP_importFct_CheckDataFormat(dataframes = data, 
440 493
                                            files = files,
441 494
                                            expNames = expNames)
442 495
   data <- argList[["dataframes"]]
443 496
   files <- argList[["files"]]
444 497
   if (!is.null(files)) {
445 498
     files2 <- files[!duplicated(names(files))]
446
-    data <- TPP_importFct_readFiles(files = files2, 
499
+    data <- .TPP_importFct_readFiles(files = files2, 
447 500
                                     naStrs = naStrs)
448 501
   }
449
-  configTable %>% group_by(Experiment, Compound,
450
-                           Temperature, RefCol)
451 502
   iVec <- seq_len(nrow(configTable))
452 503
   dataList <- lapply(iVec, function(iTmp) {
453 504
     rowTmp <- configTable[iTmp, ]
... ...
@@ -459,7 +510,7 @@ import2dMain <- function(configTable, data, idVar, fcStr,
459 510
                   "RefCol", "Path", "Condition")
460 511
     allCols <- colnames(rowTmp)
461 512
     labelCols <- setdiff(allCols, noFCCols)
462
-    labelValues <- suppressMessages(rowTmp[, labelCols] %>%
513
+    labelValues <- suppressWarnings(rowTmp[, labelCols] %>%
463 514
                                       as.numeric)
464 515
     labelColsNum <- labelCols[!is.na(labelValues)]
465 516
     signalCols <- paste(intensityStr, labelColsNum, sep = "")
... ...
@@ -480,7 +531,7 @@ import2dMain <- function(configTable, data, idVar, fcStr,
480 531
            notFound, paste("'. Please check the suffices and the", 
481 532
                            "additional column names you have specified."))
482 533
     }
483
-    dataFiltered <- TPP_importFct_removeDuplicates(
534
+    dataFiltered <- .TPP_importFct_removeDuplicates(
484 535
       inDF = dataTmp,refColName = idVar, 
485 536
       nonNAColNames = dataCols, 
486 537
       qualColName = qualColName[1])
... ...
@@ -498,17 +549,24 @@ import2dMain <- function(configTable, data, idVar, fcStr,
498 549
     return(newName)
499 550
   }, "")
500 551
   names(dataList) <- newNames
501
-  out <- TPP_importFct_rmZeroSias(configTable = configTable, 
502
-                                  data.list = dataList,
503
-                                  intensityStr = intensityStr)
552
+  out <- .TPP_importFct_rmZeroSias(data.list = dataList,
553
+                                   intensityStr = intensityStr)
504 554
   return(out)
505 555
 }
506 556
 
557
+#' Tranform configuration table from wide to long
558
+#' 
559
+#' @param configWide data frame containing a config table
560
+#' @return data frame containing config table in long format 
561
+#' 
507 562
 #' @importFrom tidyr gather
563
+#' @examples 
564
+#' data("config_tab")
565
+#' configWide2Long(configWide = config_tab)
566
+#' 
567
+#' @export
508 568
 configWide2Long <- function(configWide){
509
-  # internal function to tranform config table into long format
510
-  
511
-  Path <- label <- conc <- Compound <- Experiment <- 
569
+ Path <- label <- conc <- Compound <- Experiment <- 
512 570
     Temperature <- RefCol <- NULL
513 571
   
514 572
   if(any(grepl("Path", colnames(configWide)))){
... ...
@@ -525,11 +583,45 @@ configWide2Long <- function(configWide){
525 583
   }
526 584
 }
527 585
 
586
+#' Annotate imported data list using a config table
587
+#' @param dataList list of datasets from different MS runs 
588
+#' corresponding to a 2D-TPP dataset
589
+#' @param geneNameVar character string of the column name that describes
590
+#' the gene name of a given protein in the raw data files
591
+#' @param configLong long formatted data frame of a corresponding
592
+#' config table
593
+#' @param intensityStr character string indicating which columns contain 
594
+#' raw intensities measurements
595
+#' @param fcStr character string indicating which columns contain the actual 
596
+#' fold change values. Those column names containing the suffix \code{fcStr} 
597
+#' will be regarded as containing fold change values.
598
+#' 
599
+#' @return data frame containing all data annotated
600
+#' by information supplied in the config table
601
+#'   
528 602
 #' @importFrom tidyr spread
603
+#' 
604
+#' @examples 
605
+#' data("config_tab")
606
+#' data("raw_dat_list")
607
+#' dataList <- import2dMain(configTable = config_tab,
608
+#'                          data = raw_dat_list,
609
+#'                          idVar = "protein_id",
610
+#'                          fcStr = "rel_fc_",
611
+#'                          addCol = "gene_name",
612
+#'                          naStrs = NA,
613
+#'                          intensityStr = "signal_sum_",
614
+#'                          nonZeroCols = "qusm",
615
+#'                          qualColName = "qupm")
616
+#' configLong <- configWide2Long(configWide = config_tab)
617
+#' annotateDataList(dataList = dataList,
618
+#'                  geneNameVar = "gene_name",
619
+#'                  configLong = configLong,
620
+#'                  intensityStr = "signal_sum_",
621
+#'                  fcStr = "rel_fc_")
622
+#' @export
529 623
 annotateDataList <- function(dataList, geneNameVar, configLong,
530 624
                              intensityStr, fcStr){
531
-  # internal function to annotate list of 2D-TPP data subtables with
532
-  # information from config table
533 625
   channel <- signal <- Temperature <- RefCol <- label <- 
534 626
     conc <- unique_ID <- spread_var <- NULL
535 627
   
... ...
@@ -548,13 +640,25 @@ annotateDataList <- function(dataList, geneNameVar, configLong,
548 640
   return(combinedTab)
549 641
 }
550 642
 
643
+#' Filter out contaminants
644
+#' 
645
+#' @param dataLong long format data frame of imported dataset
646
+#' 
647
+#' @return data frame containing full dataset filtered to 
648
+#' contain no contaminants
649
+#' 
650
+#' @examples 
651
+#' data("simulated_cell_extract_df")
652
+#' filterOutContaminants(simulated_cell_extract_df)
653
+#' 
654
+#' @export
551 655
 filterOutContaminants <- function(dataLong){
552 656
   # internal function to filter out contaminants
553 657
   representative <- NULL
554 658
   filter(dataLong, !grepl("##", representative))
555 659
 }
556 660
 
557
-checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
661
+.checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
558 662
   # internal function to check that protein 
559 663
   # fold changes are computed
560 664
   # relative to the correct TMT channel
... ...
@@ -583,7 +687,7 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
583 687
 }
584 688
 
585 689
 #' @importFrom stats median
586
-medianNormalizeRatios <- function(dataLong){
690
+.medianNormalizeRatios <- function(dataLong){
587 691
   # internal function to perform median normalization 
588 692
   # of ratios
589 693
   rel_value <- temperature <- conc <- 
... ...
@@ -599,17 +703,48 @@ medianNormalizeRatios <- function(dataLong){
599 703
   return(dataOut)
600 704
 }
601 705
 
706
+#' Rename columns of imported data frame
707
+#' 
708
+#' @param dataLong long format data frame of imported dataset
709
+#' @param idVar character string indicating which data column provides the 
710
+#' unique identifiers for each protein.
711
+#' @param geneNameVar character string of the column name that describes
712
+#' the gene name of a given protein in the raw data files
713
+#' 
714
+#' @return data frame containing imported data with renamed
715
+#' columns
716
+#' 
717
+#' @examples 
718
+#' data("config_tab")
719
+#' data("raw_dat_list")
720
+#' 
721
+#' dataList <- import2dMain(configTable = config_tab,
722
+#'                          data = raw_dat_list,
723
+#'                          idVar = "protein_id",
724
+#'                          fcStr = "rel_fc_",
725
+#'                          addCol = "gene_name",
726
+#'                          naStrs = NA,
727
+#'                          intensityStr = "signal_sum_",
728
+#'                          nonZeroCols = "qusm",
729
+#'                          qualColName = "qupm")
730
+#' configLong <- configWide2Long(configWide = config_tab)
731
+#' annoDat <- annotateDataList(dataList = dataList,
732
+#'                             geneNameVar = "gene_name",
733
+#'                             configLong = configLong,
734
+#'                             intensityStr = "signal_sum_",
735
+#'                             fcStr = "rel_fc_")
736
+#' renameColumns(annoDat, 
737
+#'               idVar = "protein_id", 
738
+#'               geneNameVar = "gene_name")
739
+#' @export
602 740
 renameColumns <- function(dataLong, idVar, geneNameVar){
603
-  # internal function to rename column names to 
604
-  # match lazyeval variable
605
-  # names of main function
606 741
   clustername <- representative <- NULL
607 742
   
608 743
   dplyr::rename_(dataLong, "representative" = idVar, 
609 744
                  "clustername" = geneNameVar) %>%
610 745
     group_by(clustername) %>%
611 746
     mutate(representative =
612
-             paste_rmNA(unique(unlist(strsplit(representative, 
747
+             .paste_rmNA(unique(unlist(strsplit(representative, 
613 748
                                                split = "\\|"))), 
614 749
                         sep = "|")) %>%
615 750
     ungroup()
... ...
@@ -698,12 +833,12 @@ import2dDataset <- function(configTable, data,
698 833
                                intensityStr = intensityStr,
699 834
                                fcStr = fcStr)
700 835
   
701
-  dataRatioChecked <- checkRatioRef(dataLong, idVar = idVar,
702
-                                    concFactor = concFactor)
836
+  dataRatioChecked <- .checkRatioRef(dataLong, idVar = idVar,
837
+                                     concFactor = concFactor)
703 838
   
704 839
   if(medianNormalizeFC){
705 840
     message("Median normalizing fold changes...")
706
-    dataNorm <- medianNormalizeRatios(dataRatioChecked)
841
+    dataNorm <- .medianNormalizeRatios(dataRatioChecked)
707 842
   }else{
708 843
     dataNorm <- dataRatioChecked
709 844
   }
Browse code

started adressing BiocCheck notes

Nils Kurzawa authored on 11/03/2019 17:18:46
Showing1 changed files
... ...
@@ -1,46 +1,48 @@
1 1
 TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
2
-  # internal function copied from TPP package to avoid 
3
-  # import of non-exported package functions
4
-  . <- NULL
5
-  isDF <- !is.null(dataframes)
6
-  isF <- !is.null(files)
7
-  isBoth <- isDF & isF
8
-  isNone <- !(isDF | isF)
9
-  if (isBoth) {
10
-    stop("Data import function received a filename AND a dataframe object. \n
11
-         Please specify only one.")
12
-  }
13
-  else if (isNone) {
14
-    stop("Data import function requires a filename or a dataframe object. \n
15
-         Please specify one.")
16
-  }
17
-  if (isDF) {
18
-    isClassList <- is.list(dataframes) && !is.data.frame(dataframes)
19
-    isClassDF <- is.data.frame(dataframes)
20
-    if (isClassList) {
21
-      classesInList <- dataframes %>% 
22
-        vapply(. %>% inherits(., "data.frame"), TRUE)
23
-      if (!all(classesInList)) {
24
-        stop(paste("Argument 'dataframes' contains elements that are", 
25
-                   "not of type 'data.frame' at the following positions: "), 
26
-             which(!classesInList) %>% paste(collapse = ", "), 
27
-             ".")
28
-      }
2
+    # internal function copied from TPP package to avoid 
3
+    # import of non-exported package functions
4
+    . <- NULL
5
+    isDF <- !is.null(dataframes)
6
+    isF <- !is.null(files)
7
+    isBoth <- isDF & isF
8
+    isNone <- !(isDF | isF)
9
+    if (isBoth) {
10
+        stop("Data import function received a",
11
+             " filename AND a dataframe object. \n",
12
+             "Please specify only one.")
29 13
     }
30
-    else if (isClassDF) {
31
-      dataframes <- list(dataframes)
32
-      names(dataframes) <- expNames
14
+    else if (isNone) {
15
+        stop("Data import function requires a", 
16
+             " filename or a dataframe object. \n",
17
+             "Please specify one.")
33 18
     }
34
-    else {
35
-      stop("Argument 'dataframes' must be either an object of class \n
36
-           'data.frame', or a list of such objects.")
19
+    if (isDF) {
20
+        isClassList <- is.list(dataframes) && !is.data.frame(dataframes)
21
+        isClassDF <- is.data.frame(dataframes)
22
+        if (isClassList) {
23
+            classesInList <- dataframes %>% 
24
+            vapply(. %>% inherits(., "data.frame"), TRUE)
25
+            if (!all(classesInList)) {
26
+                stop(paste("Argument 'dataframes' contains", 
27
+                           "elements that are not of type", 
28
+                           "'data.frame' at the following positions: "), 
29
+                     which(!classesInList) %>% paste(collapse = ", "), ".")
30
+            }
31
+          }
32
+          else if (isClassDF) {
33
+              dataframes <- list(dataframes)
34
+              names(dataframes) <- expNames
35
+          }
36
+          else {
37
+              stop("Argument 'dataframes' must be either an object of class \n
38
+                   'data.frame', or a list of such objects.")
39
+          }
37 40
     }
38
-  }
39
-  if (isF) {
40
-    files <- as.character(files)
41
-    names(files) <- expNames
42
-  }
43
-  return(list(files = files, dataframes = dataframes))
41
+    if (isF) {
42
+        files <- as.character(files)
43
+        names(files) <- expNames
44
+    }
45
+    return(list(files = files, dataframes = dataframes))
44 46
 }
45 47
 
46 48
 #' @importFrom utils read.delim
... ...
@@ -95,7 +97,8 @@ TPP_importFct_removeDuplicates <- function(inDF, refColName,
95 97
     }
96 98
     retDF = rbind(retDF, tmpDF[useRow, ])
97 99
   }
98
-  message(nrow(retDF), " out of ", nrow(inDF), " rows kept for further analysis.")
100
+  message(nrow(retDF), " out of ", nrow(inDF), 
101
+          " rows kept for further analysis.")
99 102
   return(retDF)
100 103
 }
101 104
 
... ...
@@ -128,14 +131,16 @@ TPP_importFct_checkExperimentCol <- function(expCol){
128 131
   # internal function copied from TPP package to avoid 
129 132
   # import of non-exported package functions
130 133
   if (is.null(expCol)) {
131
-    m <- "Config table needs an 'Experiment' column with unique experiment IDs."
134
+    m <- paste("Config table needs an 'Experiment'", 
135
+               "column with unique experiment IDs.")
132 136
     stop(m, "\n")
133 137
   }
134 138
   oldExpNames <- expCol
135 139
   newExpNames <- gsub("([^[:alnum:]])", "_", expCol)
136 140
   iChanged <- oldExpNames != newExpNames
137 141
   if (any(iChanged)) {
138
-    m1 <- "Replaced non-alphanumeric characters in the 'Experiment' column entries:"
142
+    m1 <- paste("Replaced non-alphanumeric characters", 
143
+                "in the 'Experiment' column entries:")
139 144
     m2 <- paste("'", paste(oldExpNames[iChanged], collapse = "', '"), 
140 145
                 "'\nby\n'", paste(newExpNames[iChanged], collapse = "', '"), 
141 146
                 sep = "")
... ...
@@ -197,32 +202,42 @@ TPP_importFct_checkConditions <- function(condInfo,
197 202
   # import of non-exported package functions
198 203
   flagGenerateConds <- FALSE
199 204
   if (is.null(condInfo)) {
200
-    message("No information about experimental conditions given. Assigning NA instead.\n
201
-            Reminder: recognition of Vehicle and Treatment groups during pairwise \n
202
-            comparisons is only possible when they are specified in the config table.\n")
205
+    message("No information about experimental conditions given.", 
206
+            "Assigning NA instead.\n",
207
+            "Reminder: recognition of Vehicle and Treatment groups", 
208
+            "during pairwise \n",
209
+            "comparisons is only possible when they are specified ",
210
+            "in the config table.\n")
203 211
     condInfo <- rep(NA_character_, expectedLength)
204 212
   }
205 213
   else {
206
-    condInfo <- as.character(condInfo) %>% stringr::str_to_title()
214
+    condInfo <- as.character(condInfo) %>% 
215
+      stringr::str_to_title()
207 216
     condLevels <- unique(condInfo)
208
-    invalidLevels = setdiff(condLevels, c("Treatment", "Vehicle"))
217
+    invalidLevels = 
218
+      setdiff(condLevels, c("Treatment", "Vehicle"))
209 219
     if (length(invalidLevels) > 0) {
210 220
       stop("The entry '", invalidLevels, 
211
-           paste("' in the condition column is invalid. Only the values 'Treatment' and", 
212
-                 "'Vehicle' are allowed. Please correct this and start again."))
221
+           paste("' in the condition column is invalid.", 
222
+                 "Only the values 'Treatment' and", 
223
+                 "'Vehicle' are allowed. Please correct", 
224
+                 "this and start again."))
213 225
     }
214 226
   }
215 227
   return(condInfo)
216 228
 }
217 229
 
218
-TPP_checkFunctionArgs <- function(functionCall, expectedArguments){
230
+TPP_checkFunctionArgs <- 
231
+  function(functionCall, expectedArguments){
219 232
   # internal function copied from TPP package to avoid 
220 233
   # import of non-exported package functions
221 234
   myArgs <- names(functionCall)
222 235
   lapply(expectedArguments, function(arg) {
223 236
     if (!arg %in% myArgs) {
224
-      stop("Error in ", paste(functionCall)[1], ": argument '", 
225
-           arg, "' is missing, with no default", call. = FALSE)
237
+      stop("Error in ", paste(functionCall)[1], 
238
+           ": argument '", 
239
+           arg, "' is missing, with no default", 
240
+           call. = FALSE)
226 241
     }
227 242
   })
228 243
 }
... ...
@@ -244,11 +259,13 @@ TPP_nonLabelColumns <- function(){
244 259
   return(out)
245 260
 }
246 261
 
247
-TPP_detectLabelColumnsInConfigTable <- function(allColumns){
262
+TPP_detectLabelColumnsInConfigTable <- 
263
+  function(allColumns){
248 264
   # internal function copied from TPP package to avoid 
249 265
   # import of non-exported package functions
250 266
   TPP_checkFunctionArgs(match.call(), c("allColumns"))
251
-  noLabelCols <- TPP_nonLabelColumns()$column %>% as.character %>% 
267
+  noLabelCols <- TPP_nonLabelColumns()$column %>% 
268
+    as.character %>% 
252 269
     unique
253 270
   compCols <- grep("comparison", allColumns, value = TRUE, 
254 271
                    ignore.case = TRUE)
... ...
@@ -265,7 +282,8 @@ TPP_importCheckTemperatures <- function(temp){
265 282
   naRows <- apply(is.na(tempMatrix), 1, all)
266 283
   if (any(naRows)) {
267 284
     stop("Row(s) ", paste(which(naRows), collapse = ", "), 
268
-         " in the configuration table contain only missing temperature values.")
285
+         " in the configuration table contain", 
286
+         " only missing temperature values.")
269 287
   }
270 288
   return(tempMatrix)
271 289
 }
... ...
@@ -280,24 +298,28 @@ TPP_importFct_readConfigTable <- function(cfg){
280 298
       strChunks <- strsplit(cfg, "\\.")[[1]]
281 299
       fileExtension <- strChunks[length(strChunks)]
282 300
       if (fileExtension == "txt") {
283
-        tab <- read.table(file = cfg, header = TRUE, 
284
-                          check.names = FALSE, stringsAsFactors = FALSE, 
285
-                          sep = "\t")
301
+        tab <- read.table(
302
+          file = cfg, header = TRUE, 
303
+          check.names = FALSE, stringsAsFactors = FALSE, 
304
+          sep = "\t")
286 305
       }
287 306
       else if (fileExtension == "csv") {
288
-        tab <- read.table(file = cfg, header = TRUE, 
289
-                          check.names = FALSE, stringsAsFactors = FALSE, 
290
-                          sep = ",")
307
+        tab <- read.table(
308
+          file = cfg, header = TRUE, 
309
+          check.names = FALSE, stringsAsFactors = FALSE, 
310
+          sep = ",")
291 311
       }
292 312
       else if (fileExtension == "xlsx") {
293 313
         tab <- openxlsx::read.xlsx(cfg)
294 314
       }
295 315
       else {
296
-        stop("Error during data import: ", cfg, " does not belong to a valid configuration file.")
316
+        stop("Error during data import: ", cfg, 
317
+             " does not belong to a valid configuration file.")
297 318
       }
298 319
     }
299 320
     else {
300
-      stop("Error during data import: ", cfg, " does not belong to a valid configuration file.")
321
+      stop("Error during data import: ", cfg, 
322
+           " does not belong to a valid configuration file.")
301 323
     }
302 324
     cfg <- tab
303 325
   }
... ...
@@ -311,21 +333,25 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
311 333
   Experiment = Path = Compound <- NULL
312 334
   isValidDF <- FALSE
313 335
   if (is.data.frame(infoTable)) {
314
-    if ((ncol(infoTable) > 1) & ("Experiment" %in% colnames(infoTable))) {
336
+    if ((ncol(infoTable) > 1) & 
337
+        ("Experiment" %in% colnames(infoTable))) {
315 338
       isValidDF <- TRUE
316 339
     }
317 340
   }
318 341
   if (!is.character(infoTable) & !isValidDF) {
319
-    stop("'infoTable' must either be a data frame with an 'Experiment' column \n
320
-         and at least one isobaric label column, or a filename pointing at a \n
321
-         table that fulfills the same criteria")
342
+    stop("'infoTable' must either be a data frame", 
343
+         " with an 'Experiment' column \n",
344
+         "and at least one isobaric label column,", 
345
+         "or a filename pointing at a \n",
346
+         "table that fulfills the same criteria")
322 347
   }
323 348
   isValidType <- type %in% c("2D")
324 349
   if (!isValidType) {
325 350
     stop("'type' must have this value: '2D'")
326 351
   }
327 352
   infoTable <- TPP_importFct_readConfigTable(cfg = infoTable)
328
-  infoTable$Experiment <- TPP_importFct_checkExperimentCol(infoTable$Experiment)
353
+  infoTable$Experiment <- 
354
+    TPP_importFct_checkExperimentCol(infoTable$Experiment)
329 355
   infoTable <- subset(infoTable, Experiment != "")
330 356
   givenPaths <- NULL
331 357
   if (any("Path" %in% colnames(infoTable))) {
... ...
@@ -360,31 +386,38 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
360 386
   if (type == "2D") {
361 387
     temperatures <- infoTable$Temperature
362 388
     if (is.null(temperatures) | length(temperatures) < 2) {
363
-      m1 <- "Insufficient temperatures (<2) specified in config file."
364
-      m2 <- "Does your configuration table have the correct column names?"
389
+      m1 <- paste("Insufficient temperatures (<2)", 
390
+                  "specified in config file.")
391
+      m2 <- paste("Does your configuration table", 
392
+                  "have the correct column names?")
365 393
       stop(m1, "\n", m2)
366 394
     }
367 395
     else if (length(which(!infoTable$RefCol %in% labelColsNew)) != 
368 396
              0) {
369
-      stop("Labels in reference column not found in any of teh label columns.")
397
+      stop("Labels in reference column not found", 
398
+           "in any of teh label columns.")
370 399
     }
371 400
     hasCompoundCol <- any(allCols == "Compound")
372 401
     if (!hasCompoundCol) {
373
-      m <- "Config table of a 2D-TPP experiment needs a 'Compound' column."
402
+      m <- paste("Config table of a 2D-TPP experiment", 
403
+                 "needs a 'Compound' column.")
374 404
       stop(m, "\n")
375 405
     }
376 406
     else {
377
-      infoTable <- infoTable %>% mutate(Compound = gsub("([^[:alnum:]])", 
378
-                                                        "_", Compound))
407
+      infoTable <- infoTable %>% 
408
+        mutate(Compound = 
409
+                 gsub("([^[:alnum:]])", "_", Compound))
379 410
     }
380 411
     out <- infoTable
381 412
   }
382 413
   else {
383 414
     temperatures <- subset(infoTable, select = labelColsNew)
384 415
     tempMatrix <- TPP_importCheckTemperatures(temp = temperatures)
385
-    infoList <- list(expNames = as.character(infoTable$Experiment), 
386
-                     expCond = infoTable$Condition, files = givenPaths, 
387
-                     compStrs = compStrs, labels = labelColsNew, tempMatrix = tempMatrix)
416
+    infoList <- list(
417
+      expNames = as.character(infoTable$Experiment), 
418
+      expCond = infoTable$Condition, files = givenPaths, 
419
+      compStrs = compStrs, labels = labelColsNew, 
420
+      tempMatrix = tempMatrix)
388 421
     out <- infoList
389 422
   }
390 423
   return(out)
... ...
@@ -504,9 +537,11 @@ annotateDataList <- function(dataList, geneNameVar, configLong,
504 537
     datLong <- dat %>% tbl_df() %>%
505 538
       gather(channel, signal, matches(intensityStr), matches(fcStr)) %>%
506 539
       mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>%
507
-      left_join(configLong %>% dplyr::select(Temperature, RefCol, label, conc),
540
+      left_join(configLong %>% 
541
+                  dplyr::select(Temperature, RefCol, label, conc),
508 542
                 by = c("temperature" = "Temperature", "label")) %>%
509
-      mutate(spread_var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>%
543
+      mutate(spread_var = 
544
+               ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>%
510 545
       dplyr::select(-channel, -unique_ID) %>%
511 546
       spread(spread_var, signal)
512 547
   }))
... ...
@@ -520,11 +555,13 @@ filterOutContaminants <- function(dataLong){
520 555
 }
521 556
 
522 557
 checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
523
-  # internal function to check that protein fold changes are computed
558
+  # internal function to check that protein 
559
+  # fold changes are computed
524 560
   # relative to the correct TMT channel
525 561
   label <- RefCol <- rel_value <- raw_value <- conc <- NULL
526 562
   
527
-  if(!all(filter(dataLong, label == RefCol)$rel_value == 1, na.rm = TRUE)){
563
+  if(!all(filter(dataLong, label == RefCol)$rel_value == 1, 
564
+          na.rm = TRUE)){
528 565
     message("Recomputing ratios!")
529 566
     dataOut <- dataLong %>%
530 567
       dplyr::group_by_(idVar, "temperature") %>%
... ...
@@ -547,7 +584,8 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
547 584
 
548 585
 #' @importFrom stats median
549 586
 medianNormalizeRatios <- function(dataLong){
550
-  # internal function to perform median normalization of ratios
587
+  # internal function to perform median normalization 
588
+  # of ratios
551 589
   rel_value <- temperature <- conc <- 
552 590
     raw_rel_value <- NULL
553 591
   
... ...
@@ -562,7 +600,8 @@ medianNormalizeRatios <- function(dataLong){
562 600
 }
563 601
 
564 602
 renameColumns <- function(dataLong, idVar, geneNameVar){
565
-  # internal function to rename column names to match lazyeval variable
603
+  # internal function to rename column names to 
604
+  # match lazyeval variable
566 605
   # names of main function
567 606
   clustername <- representative <- NULL
568 607
   
... ...
@@ -570,7 +609,8 @@ renameColumns <- function(dataLong, idVar, geneNameVar){
570 609
                  "clustername" = geneNameVar) %>%
571 610
     group_by(clustername) %>%
572 611
     mutate(representative =
573
-             paste_rmNA(unique(unlist(strsplit(representative, split = "\\|"))), 
612
+             paste_rmNA(unique(unlist(strsplit(representative, 
613
+                                               split = "\\|"))), 
574 614
                         sep = "|")) %>%
575 615
     ungroup()
576 616
 }
... ...
@@ -610,7 +650,8 @@ renameColumns <- function(dataLong, idVar, geneNameVar){
610 650
 #' @examples 
611 651
 #' data("config_tab")
612 652
 #' data("raw_dat_list")
613
-#' import_df <- import2dDataset(configTable = config_tab, data = raw_dat_list,
653
+#' import_df <- import2dDataset(configTable = config_tab, 
654
+#'                              data = raw_dat_list,
614 655
 #'                              idVar = "protein_id",
615 656
 #'                              intensityStr = "signal_sum_",
616 657
 #'                              fcStr = "rel_fc_",
... ...
@@ -637,7 +678,8 @@ import2dDataset <- function(configTable, data,
637 678
                             medianNormalizeFC = TRUE,
638 679
                             filterContaminants = TRUE){
639 680
   
640
-  configWide <- TPP_importCheckConfigTable(infoTable = configTable, type = "2D")
681
+  configWide <- TPP_importCheckConfigTable(
682
+    infoTable = configTable, type = "2D")
641 683
   configLong <- configWide2Long(configWide = configWide)
642 684
   
643 685
   dataList <- import2dMain(configTable = configWide,
Browse code

resolved remaining 'no visible binding' problems of variables and added test for model fits

Nils Kurzawa authored on 05/02/2019 16:06:20
Showing1 changed files
... ...
@@ -548,6 +548,9 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
548 548
 #' @importFrom stats median
549 549
 medianNormalizeRatios <- function(dataLong){
550 550
   # internal function to perform median normalization of ratios
551
+  rel_value <- temperature <- conc <- 
552
+    raw_rel_value <- NULL
553
+  
551 554
   dataOut <- dataLong %>%
552 555
     rename(raw_rel_value = rel_value) %>%
553 556
     group_by(temperature, conc) %>%
... ...
@@ -561,6 +564,8 @@ medianNormalizeRatios <- function(dataLong){
561 564
 renameColumns <- function(dataLong, idVar, geneNameVar){
562 565
   # internal function to rename column names to match lazyeval variable
563 566
   # names of main function
567
+  clustername <- representative <- NULL
568
+  
564 569
   dplyr::rename_(dataLong, "representative" = idVar, 
565 570
                  "clustername" = geneNameVar) %>%
566 571
     group_by(clustername) %>%
Browse code

resolved more visible binding problems replaced example data once more to include correct concentrations for tru positives

Nils Kurzawa authored on 04/02/2019 18:09:22
Showing1 changed files
... ...
@@ -1,6 +1,7 @@
1 1
 TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
2 2
   # internal function copied from TPP package to avoid 
3 3
   # import of non-exported package functions
4
+  . <- NULL
4 5
   isDF <- !is.null(dataframes)
5 6
   isF <- !is.null(files)
6 7
   isBoth <- isDF & isF
... ...
@@ -18,7 +19,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
18 19
     isClassDF <- is.data.frame(dataframes)
19 20
     if (isClassList) {
20 21
       classesInList <- dataframes %>% 
21
-        sapply(. %>% inherits(., "data.frame"))
22
+        vapply(. %>% inherits(., "data.frame"), TRUE)
22 23
       if (!all(classesInList)) {
23 24
         stop(paste("Argument 'dataframes' contains elements that are", 
24 25
                    "not of type 'data.frame' at the following positions: "), 
... ...
@@ -43,6 +44,7 @@ TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
43 44
 }
44 45
 
45 46
 #' @importFrom utils read.delim
47
+#' @importFrom RCurl url.exists
46 48
 TPP_importFct_readFiles <- function (files, naStrs){
47 49
   # internal function copied from TPP package to avoid 
48 50
   # import of non-exported package functions
... ...
@@ -74,7 +76,7 @@ TPP_importFct_removeDuplicates <- function(inDF, refColName,
74 76
   for (nU in nonUniques) {
75 77
     tmpDF = subset(inDF, get(refColName) == nU)
76 78
     nonNArows = NULL
77
-    for (r in 1:nrow(tmpDF)) {
79
+    for (r in seq_len(nrow(tmpDF))) {
78 80
       if (any(!is.na(tmpDF[r, nonNAColNames]))) {
79 81
         nonNArows = c(nonNArows, r)
80 82
       }
... ...
@@ -217,7 +219,7 @@ TPP_checkFunctionArgs <- function(functionCall, expectedArguments){
217 219
   # internal function copied from TPP package to avoid 
218 220
   # import of non-exported package functions
219 221
   myArgs <- names(functionCall)
220
-  sapply(expectedArguments, function(arg) {
222
+  lapply(expectedArguments, function(arg) {
221 223
     if (!arg %in% myArgs) {
222 224
       stop("Error in ", paste(functionCall)[1], ": argument '", 
223 225
            arg, "' is missing, with no default", call. = FALSE)
... ...
@@ -335,23 +337,8 @@ TPP_importCheckConfigTable <- function (infoTable, type = "2D"){
335 337
       givenPaths <- infoTable$Path
336 338
     }
337 339
   }
338
-  if (type == "TR") {
339
-    infoTable <- TPP_importFct_replaceReplicateColumn(cfg = infoTable)
340
-  }
341
-  if (type == "TR") {
342
-    compStrs <- TPP_importFct_checkComparisons(confgTable = infoTable)
343
-  }
344
-  else {
345
-    compStrs <- NA
346
-  }
347
-  if (type == "TR") {
348
-    infoTable$Condition <- TPP_importFct_checkConditions(
349
-      infoTable$Condition, 
350
-      nrow(infoTable))
351
-  }
352
-  else {
353
-    infoTable$Condition <- NULL
354
-  }
340
+  compStrs <- NA
341
+  infoTable$Condition <- NULL
355 342
   allCols <- colnames(infoTable)
356 343
   labelCols <- TPP_detectLabelColumnsInConfigTable(allColumns = allCols)
357 344
   labelValues <- infoTable[, labelCols]
... ...
@@ -470,13 +457,13 @@ import2dMain <- function(configTable, data, idVar, fcStr,
470 457
       mutate(temperature = tTmp, experiment = expTmp, unique_ID = idsAnnotated)
471 458
     return(dataFinal)
472 459
   })
473
-  newNames <- sapply(seq(nrow(configTable)), function(iTmp) {
460
+  newNames <- vapply(seq(nrow(configTable)), function(iTmp) {
474 461
     rowTmp <- configTable[iTmp, ]
475 462
     tTmp <- rowTmp$Temperature
476 463
     expTmp <- rowTmp$Experiment
477 464
     newName <- paste(expTmp, tTmp, sep = "_")
478 465
     return(newName)
479
-  })
466
+  }, "")
480 467
   names(dataList) <- newNames
481 468
   out <- TPP_importFct_rmZeroSias(configTable = configTable, 
482 469
                                   data.list = dataList,
... ...
@@ -487,14 +474,20 @@ import2dMain <- function(configTable, data, idVar, fcStr,
487 474
 #' @importFrom tidyr gather
488 475
 configWide2Long <- function(configWide){
489 476
   # internal function to tranform config table into long format
477
+  
478
+  Path <- label <- conc <- Compound <- Experiment <- 
479
+    Temperature <- RefCol <- NULL
480
+  
490 481
   if(any(grepl("Path", colnames(configWide)))){
491 482
     configLong <- configWide %>%
492 483
       dplyr::select(-Path) %>%
493
-      gather(label, conc, -Compound, -Experiment, -Temperature, -RefCol) %>%
484
+      gather(label, conc, -Compound, 
485
+             -Experiment, -Temperature, -RefCol) %>%
494 486
       filter(conc != "-")
495 487
   }else{
496 488
     configLong <- configWide %>%
497
-      gather(label, conc, -Compound, -Experiment, -Temperature, -RefCol) %>%
489
+      gather(label, conc, -Compound, 
490
+             -Experiment, -Temperature, -RefCol) %>%
498 491
       filter(conc != "-")
499 492
   }
500 493
 }
... ...
@@ -505,7 +498,7 @@ annotateDataList <- function(dataList, geneNameVar, configLong,
505 498
   # internal function to annotate list of 2D-TPP data subtables with
506 499
   # information from config table
507 500
   channel <- signal <- Temperature <- RefCol <- label <- 
508
-    conc <- unique_ID <- NULL
501
+    conc <- unique_ID <- spread_var <- NULL
509 502
   
510 503
   combinedTab <- bind_rows(lapply(dataList, function(dat){
511 504
     datLong <- dat %>% tbl_df() %>%
... ...
@@ -522,12 +515,15 @@ annotateDataList <- function(dataList, geneNameVar, configLong,
522 515
 
523 516
 filterOutContaminants <- function(dataLong){
524 517
   # internal function to filter out contaminants
518
+  representative <- NULL
525 519
   filter(dataLong, !grepl("##", representative))
526 520
 }
527 521
 
528 522
 checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
529 523
   # internal function to check that protein fold changes are computed
530 524
   # relative to the correct TMT channel
525
+  label <- RefCol <- rel_value <- raw_value <- conc <- NULL
526
+  
531 527
   if(!all(filter(dataLong, label == RefCol)$rel_value == 1, na.rm = TRUE)){
532 528
     message("Recomputing ratios!")
533 529
     dataOut <- dataLong %>%
Browse code

added updated version of simulated cell_extract_df to match imported version, added updated example section for import function in function documentation.

Nils Kurzawa authored on 01/02/2019 11:37:31
Showing1 changed files
... ...
@@ -428,7 +428,7 @@ import2dMain <- function(configTable, data, idVar, fcStr,
428 428
   }
429 429
   configTable %>% group_by(Experiment, Compound,
430 430
                            Temperature, RefCol)
431
-  iVec <- 1:nrow(configTable)
431
+  iVec <- seq_len(nrow(configTable))
432 432
   dataList <- lapply(iVec, function(iTmp) {
433 433
     rowTmp <- configTable[iTmp, ]
434 434
     expTmp <- rowTmp$Experiment
... ...
@@ -606,6 +606,21 @@ renameColumns <- function(dataLong, idVar, geneNameVar){
606 606
 #' 
607 607
 #' @return tidy data frame representing a 2D-TPP dataset
608 608
 #' 
609
+#' @examples 
610
+#' data("config_tab")
611
+#' data("raw_dat_list")
612
+#' import_df <- import2dDataset(configTable = config_tab, data = raw_dat_list,
613
+#'                              idVar = "protein_id",
614
+#'                              intensityStr = "signal_sum_",
615
+#'                              fcStr = "rel_fc_",
616
+#'                              nonZeroCols = "qusm",
617
+#'                              geneNameVar = "gene_name",
618
+#'                              addCol = NULL,
619
+#'                              qualColName = "qupm",
620
+#'                              naStrs = c("NA", "n/d", "NaN"),
621
+#'                              concFactor = 1e6,
622
+#'                              medianNormalizeFC = TRUE,
623
+#'                              filterContaminants = TRUE)
609 624
 #' 
610 625
 #' @export
611 626
 import2dDataset <- function(configTable, data,
... ...
@@ -614,7 +629,7 @@ import2dDataset <- function(configTable, data,
614 629
                             fcStr = "rel_fc_protein_",
615 630
                             nonZeroCols = "qssm",
616 631
                             geneNameVar = "clustername",
617
-                            addCol = "",
632
+                            addCol = NULL,
618 633
                             qualColName = "qupm",
619 634
                             naStrs = c("NA", "n/d", "NaN"),
620 635
                             concFactor = 1e6,
Browse code

more bioccheck warnings eliminated installation guide and short package description added to readme

Nils Kurzawa authored on 31/01/2019 20:16:41
Showing1 changed files
... ...
@@ -604,6 +604,9 @@ renameColumns <- function(dataLong, idVar, geneNameVar){
604 604
 #' @param concFactor numeric value that indicates how concentrations need to 
605 605
 #' be adjusted to yield total unit e.g. default mmol - 1e6
606 606
 #' 
607
+#' @return tidy data frame representing a 2D-TPP dataset
608
+#' 
609
+#' 
607 610
 #' @export
608 611
 import2dDataset <- function(configTable, data,
609 612
                             idVar = "representative",
Browse code

started fixing biocCheck errors

Nils Kurzawa authored on 30/01/2019 17:04:20
Showing1 changed files
... ...
@@ -499,19 +499,23 @@ configWide2Long <- function(configWide){
499 499
   }
500 500
 }
501 501
 
502
+#' @importFrom tidyr spread
502 503
 annotateDataList <- function(dataList, geneNameVar, configLong,
503 504
                              intensityStr, fcStr){
504 505
   # internal function to annotate list of 2D-TPP data subtables with
505 506
   # information from config table
507
+  channel <- signal <- Temperature <- RefCol <- label <- 
508
+    conc <- unique_ID <- NULL
509
+  
506 510
   combinedTab <- bind_rows(lapply(dataList, function(dat){
507 511
     datLong <- dat %>% tbl_df() %>%
508 512
       gather(channel, signal, matches(intensityStr), matches(fcStr)) %>%
509 513
       mutate(label = gsub(fcStr, "", gsub(intensityStr, "", channel))) %>%
510 514
       left_join(configLong %>% dplyr::select(Temperature, RefCol, label, conc),
511 515
                 by = c("temperature" = "Temperature", "label")) %>%
512
-      mutate(var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>%
516
+      mutate(spread_var = ifelse(grepl(fcStr, channel), "rel_value", "raw_value")) %>%
513 517
       dplyr::select(-channel, -unique_ID) %>%
514
-      spread(var, signal)
518
+      spread(spread_var, signal)
515 519
   }))
516 520
   return(combinedTab)
517 521
 }
... ...
@@ -545,12 +549,14 @@ checkRatioRef <- function(dataLong, idVar, concFactor = 1e6){
545 549
   }
546 550
 }
547 551
 
552
+#' @importFrom stats median
548 553
 medianNormalizeRatios <- function(dataLong){
549 554
   # internal function to perform median normalization of ratios
550 555
   dataOut <- dataLong %>%
551 556
     rename(raw_rel_value = rel_value) %>%
552 557
     group_by(temperature, conc) %>%
553
-    mutate(rel_value = raw_rel_value / median(raw_rel_value, na.rm = TRUE)) %>%
558
+    mutate(rel_value = raw_rel_value / 
559
+             median(raw_rel_value, na.rm = TRUE)) %>%
554 560
     ungroup()
555 561
   
556 562
   return(dataOut)
Browse code

removed TPP package dependency

Nils Kurzawa authored on 25/01/2019 16:33:43
Showing1 changed files
... ...
@@ -1,4 +1,408 @@
1
-#' @import TPP
1
+TPP_importFct_CheckDataFormat <- function (files, dataframes, expNames){
2
+  # internal function copied from TPP package to avoid 
3
+  # import of non-exported package functions
4
+  isDF <- !is.null(dataframes)
5
+  isF <- !is.null(files)
6
+  isBoth <- isDF & isF
7
+  isNone <- !(isDF | isF)
8
+  if (isBoth) {
9
+    stop("Data import function received a filename AND a dataframe object. \n
10
+         Please specify only one.")
11
+  }
12
+  else if (isNone) {
13
+    stop("Data import function requires a filename or a dataframe object. \n
14
+         Please specify one.")
15
+  }
16
+  if (isDF) {
17
+    isClassList <- is.list(dataframes) && !is.data.frame(dataframes)
18
+    isClassDF <- is.data.frame(dataframes)
19
+    if (isClassList) {
20
+      classesInList <- dataframes %>% 
21
+        sapply(. %>% inherits(., "data.frame"))
22
+      if (!all(classesInList)) {
23
+        stop(paste("Argument 'dataframes' contains elements that are", 
24
+                   "not of type 'data.frame' at the following positions: "), 
25
+             which(!classesInList) %>% paste(collapse = ", "), 
26
+             ".")
27
+      }
28
+    }
29
+    else if (isClassDF) {
30
+      dataframes <- list(dataframes)
31
+      names(dataframes) <- expNames
32
+    }
33
+    else {
34
+      stop("Argument 'dataframes' must be either an object of class \n
35
+           'data.frame', or a list of such objects.")
36
+    }
37
+  }
38
+  if (isF) {
39
+    files <- as.character(files)
40
+    names(files) <- expNames
41
+  }
42
+  return(list(files = files, dataframes = dataframes))
43
+}
44
+
45
+#' @importFrom utils read.delim
46
+TPP_importFct_readFiles <- function (files, naStrs){
47
+  # internal function copied from TPP package to avoid 
48
+  # import of non-exported package functions
49
+  expNames <- names(files)
50
+  data <- vector("list", length(files))
51
+  names(data) <- expNames
52
+  for (expName in expNames) {
53
+    fTmp <- files[[expName]]
54
+    if (file.exists(fTmp) || url.exists(fTmp)) {
55
+      data[[expName]] <- read.delim(fTmp, as.is = TRUE, 
56
+                                    na.strings = naStrs, quote = "")
57
+    }
58
+    else {
59
+      stop("File ", fTmp, " could not be found.")
60
+    }
61
+  }
62
+  return(data)
63
+}
64
+
65
+TPP_importFct_removeDuplicates <- function(inDF, refColName, 
66
+                                           nonNAColNames, qualColName){
67
+  # internal function copied from TPP package to avoid 
68
+  # import of non-exported package functions
69
+  message("Removing duplicate identifiers using quality column '", 
70
+          qualColName, "'...")
71
+  nonUniques = unique(inDF[duplicated(inDF[[refColName]]), 
72
+                           refColName])
73
+  retDF = subset(inDF, !(get(refColName) %in% nonUniques))
74
+  for (nU in nonUniques) {
75
+    tmpDF = subset(inDF, get(refColName) == nU)
76
+    nonNArows = NULL
77
+    for (r in 1:nrow(tmpDF)) {
78
+      if (any(!is.na(tmpDF[r, nonNAColNames]))) {
79
+        nonNArows = c(nonNArows, r)
80
+      }
81
+    }
82
+    if (length(nonNArows) > 1) {
83
+      if (is.null(qualColName)) {
84
+        useRow = 1
85
+      }
86
+      else {
87
+        qualVals = tmpDF[nonNArows, qualColName]
88
+        useRow = match(max(qualVals), qualVals)
89
+      }
90
+    }
91
+    else {
92
+      useRow = nonNArows[1]
93
+    }
94
+    retDF = rbind(retDF, tmpDF[useRow, ])
95
+  }
96
+  message(nrow(retDF), " out of ", nrow(inDF), " rows kept for further analysis.")
97
+  return(retDF)
98
+}
99
+
100
+TPP_replaceZeros <- function(x){
101
+  # internal function copied from TPP package to avoid 
102
+  # import of non-exported package functions
103
+  x[which(x == 0)] <- NA
104
+  return(x)
105
+}
106
+
107
+TPP_importFct_rmZeroSias <- function(configTable, data.list, 
108
+                                     intensityStr){
109
+  # internal function copied from TPP package to avoid 
110
+  # import of non-exported package functions
111
+  out <- lapply(names(data.list), function(l.name) {
112
+    datTmp <- data.list[[l.name]]
113
+    colsTmp <- colnames(datTmp)
114
+    intensity.cols <- grep(intensityStr, colsTmp, value = TRUE)
115
+    intensity.df <- subset(datTmp, select = intensity.cols) %>% 
116
+      mutate_all(as.character) %>% mutate_all(as.numeric)
117
+    new.intensity.df <- intensity.df %>% mutate_all(TPP_replaceZeros)
118
+    datTmp[, intensity.cols] <- new.intensity.df
119
+    return(datTmp)
120
+  })
121
+  names(out) <- names(data.list)
122
+  return(out)
123
+}
124
+
125
+TPP_importFct_checkExperimentCol <- function(expCol){
126
+  # internal function copied from TPP package to avoid 
127
+  # import of non-exported package functions
128
+  if (is.null(expCol)) {
129
+    m <- "Config table needs an 'Experiment' column with unique experiment IDs."
130
+    stop(m, "\n")
131
+  }
132
+  oldExpNames <- expCol
133
+  newExpNames <- gsub("([^[:alnum:]])", "_", expCol)
134
+  iChanged <- oldExpNames != newExpNames
135
+  if (any(iChanged)) {
136
+    m1 <- "Replaced non-alphanumeric characters in the 'Experiment' column entries:"
137
+    m2 <- paste("'", paste(oldExpNames[iChanged], collapse = "', '"), 
138
+                "'\nby\n'", paste(newExpNames[iChanged], collapse = "', '"), 
139
+                sep = "")
140
+    message(m1, "\n", m2, "\n")
141
+  }
142
+  return(newExpNames)
143
+}
144
+
145
+TPP_importFct_checkComparisons <- function(confgTable){
146
+  # internal function copied from TPP package to avoid 
147
+  # import of non-exported package functions
148
+  expConds <- confgTable$Condition
149
+  expNames <- confgTable$Experiment
150
+  compCols <- grep("Comparison", colnames(confgTable), ignore.case = TRUE, 
151
+                   value = TRUE)
152
+  compChars <- apply(confgTable[compCols], 2, function(x) {
153
+    length(grep("[[:alnum:]]", x, value = TRUE))
154
+  })
155
+  comp_unequal_two <- compChars != 2
156
+  if (any(comp_unequal_two)) {
157
+    warning(paste("\nThe following comparison columns could not be evaluated", 
158
+                  "because they did not contain exactly two entries:\n\t\t"), 
159
+            paste(compCols[comp_unequal_two], collapse = ",\n\t\t"))
160
+  }
161
+  validCompCols <- compCols[!comp_unequal_two]
162
+  allCompStrs <- c()
163
+  if (length(validCompCols) > 0) {
164
+    message("Comparisons will be performed between the following experiments:")
165
+    for (colName in validCompCols) {
166
+      current_compEntries <- confgTable[[colName]]
167
+      current_compRows <- grep("[[:alnum:]]", current_compEntries)