6 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "064EA836", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/launch_protGear_interactive.R", |
|
4 |
- "project_path": "R/launch_protGear_interactive.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647521182528.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 9, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "52,0", |
|
16 |
- "scrollLine": "47" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647760747, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647760747, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,64 +0,0 @@ |
1 |
-#' launch_protGear_interactive |
|
2 |
-#' |
|
3 |
-#' This is Function is to launch the shiny application |
|
4 |
-#' @return launches the shiny interactive protGear app |
|
5 |
-#' @import rmarkdown shiny GGally pheatmap knitr |
|
6 |
-#' grid styler factoextra FactoMineR remotes |
|
7 |
-#' @importFrom flexdashboard renderValueBox valueBoxOutput valueBox |
|
8 |
-#' @importFrom shinydashboard renderInfoBox |
|
9 |
-#' @importFrom dplyr group_rows between first last |
|
10 |
-#' @importFrom kableExtra text_spec |
|
11 |
-#' @export |
|
12 |
-#' @examples |
|
13 |
-#'launch_protGear_interactive() |
|
14 |
-launch_protGear_interactive <- function() { |
|
15 |
- appDir <- |
|
16 |
- system.file("shiny-examples", |
|
17 |
- "protGear_interactive", |
|
18 |
- "protGear_interactive.Rmd", |
|
19 |
- package = "protGear") |
|
20 |
- if (appDir == "") { |
|
21 |
- stop("Could not find example directory. Try re-installing `protGear`.", |
|
22 |
- call. = FALSE) |
|
23 |
- } |
|
24 |
- |
|
25 |
- rmarkdown::run(file = appDir) |
|
26 |
-} |
|
27 |
- |
|
28 |
-#' launch_select |
|
29 |
-#' |
|
30 |
-#' This is Function is to launch mutiple shiny applications for protGear |
|
31 |
-#' @param theApp accepts one of the folders containing the shiny appplication |
|
32 |
-#' @return launches the app defined under theApp |
|
33 |
-#' @export |
|
34 |
-#' @examples |
|
35 |
-#' launch_select('protGear_interactive') |
|
36 |
-launch_select <- function(theApp) { |
|
37 |
- # locate all the shiny app examples that exist |
|
38 |
- validExamples <- |
|
39 |
- list.files(system.file("shiny-examples", package = "protGear")) |
|
40 |
- |
|
41 |
- validExamplesMsg <- |
|
42 |
- paste0("The available apps in `protGear` are: '", |
|
43 |
- paste(validExamples, collapse = "', '"), |
|
44 |
- "'") |
|
45 |
- |
|
46 |
- # if an invalid folder is given, throw an error |
|
47 |
- if (missing(theApp) || !nzchar(theApp) || |
|
48 |
- !theApp %in% validExamples) { |
|
49 |
- stop( |
|
50 |
- 'Please run `launch_select()` with a valid example app as an argument.\n', |
|
51 |
- validExamplesMsg, |
|
52 |
- call. = FALSE |
|
53 |
- ) |
|
54 |
- } |
|
55 |
- |
|
56 |
- # find and launch the app |
|
57 |
- appDir <- |
|
58 |
- system.file("shiny-examples", theApp, package = "protGear") |
|
59 |
- if (grepl('protGear_interactive', theApp)) { |
|
60 |
- file_rmd <- 'protGear_interactive.Rmd' |
|
61 |
- } else |
|
62 |
- file_rmd <- 'index.Rmd' |
|
63 |
- rmarkdown::run(file = paste0(appDir, "/", file_rmd)) |
|
64 |
-} |
65 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "112F37F3", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/saved_functions.R", |
|
4 |
- "project_path": null, |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647626140637.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 15, |
|
12 |
- "properties": { |
|
13 |
- "tempName": "Untitled1", |
|
14 |
- "source_window_id": "", |
|
15 |
- "Source": "Source", |
|
16 |
- "cursorPosition": "62,0", |
|
17 |
- "scrollLine": "36" |
|
18 |
- }, |
|
19 |
- "folds": "", |
|
20 |
- "lastKnownWriteTime": 1640081043, |
|
21 |
- "encoding": "UTF-8", |
|
22 |
- "collab_server": "", |
|
23 |
- "source_window": "", |
|
24 |
- "last_content_update": 1640081043, |
|
25 |
- "read_only": false, |
|
26 |
- "read_only_alternatives": [] |
|
27 |
-} |
|
28 | 0 |
\ No newline at end of file |
29 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,64 +0,0 @@ |
1 |
- |
|
2 |
-#' |
|
3 |
-#' @title Merge array data with the sample ID |
|
4 |
-#' @description merge several slide datasets |
|
5 |
-#' |
|
6 |
-#' @param dfs A character vector with the names of the |
|
7 |
-#' @param filenames A vector or list of files names |
|
8 |
-#' @param data_files A list with of data files |
|
9 |
-#' @param totsamples The total number of samples per slide. defined in genepix vars |
|
10 |
-#' @param blockspersample the number of samples in a block |
|
11 |
-#' @param sampleID_path location of the sample id files defined in the genepix vars |
|
12 |
-#' @param bg A logical character indicating whether the background data is being merged or not |
|
13 |
-#' @export |
|
14 |
-#' @import purrr |
|
15 |
-#' @return A data frame of slide data merged together, for eith bg data or bg corrected data |
|
16 |
-merge_datasets <- function(dfs,filenames , data_files ,totsamples, blockspersample , |
|
17 |
- sampleID_path,bg=FALSE){ |
|
18 |
- if(bg==TRUE){ |
|
19 |
- data1_transp <- purrr::map(.x=dfs, .f=transp_bg, |
|
20 |
- totsamples=totsamples,blockspersample=blockspersample, |
|
21 |
- sampleID_path=sampleID_path,data_files=data_files ) |
|
22 |
- }else{ |
|
23 |
- data1_transp <- purrr::map(.x=dfs, .f=transp, |
|
24 |
- totsamples=totsamples,blockspersample=blockspersample, |
|
25 |
- sampleID_path=sampleID_path,data_files=data_files ) |
|
26 |
- } |
|
27 |
- |
|
28 |
- |
|
29 |
- data1_transp <- set_names(data1_transp, purrr::map(filenames, name_of_files)) |
|
30 |
- return(data1_transp) |
|
31 |
-} |
|
32 |
-#' \\\_End_function\\\ # |
|
33 |
-#' |
|
34 |
-#' |
|
35 |
- |
|
36 |
-#' Plot mean vs SD of normalised data |
|
37 |
-#' |
|
38 |
-#' @param exprs_normalised A normalised object of class data frame or matrix |
|
39 |
-#' |
|
40 |
-#' @return A ggplot of mean vs standard deviation |
|
41 |
-#' @export |
|
42 |
-#' @description A genereic function to plot \code{mean} vs \code{sd} after normalisation. |
|
43 |
-#' @import ggplot2 |
|
44 |
-#' @examples |
|
45 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
46 |
-#' normlise_vsn <- matrix_normalise(as.matrix(matrix_antigen), |
|
47 |
-#' method = "vsn", |
|
48 |
-#' return_plot = FALSE |
|
49 |
-#' ) |
|
50 |
-#' plot_mean_sd(normlise_vsn) |
|
51 |
-plot_mean_sd <- function(exprs_normalised){ |
|
52 |
- |
|
53 |
- ggplot(exprs_normalised , aes(x=rankMean, y=stdev)) + |
|
54 |
- geom_jitter( color="red") + theme_classic() + |
|
55 |
- ggtitle(paste0(toupper(unique(exprs_normalised$method)))) + |
|
56 |
- xlab("Mean rank normalised") + |
|
57 |
- ylab("SD normalised") + |
|
58 |
- theme_light() |
|
59 |
-} |
|
60 |
- |
|
61 |
- |
|
62 |
- |
|
63 |
- |
|
64 |
- |
65 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "1237BDCB", |
|
3 |
- "path": null, |
|
4 |
- "project_path": null, |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": true, |
|
9 |
- "created": 1643379365839.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 16, |
|
12 |
- "properties": { |
|
13 |
- "tempName": "Untitled1", |
|
14 |
- "source_window_id": "", |
|
15 |
- "Source": "Source", |
|
16 |
- "cursorPosition": "37,0", |
|
17 |
- "scrollLine": "0" |
|
18 |
- }, |
|
19 |
- "folds": "", |
|
20 |
- "lastKnownWriteTime": 8319874934561204079, |
|
21 |
- "encoding": "", |
|
22 |
- "collab_server": "", |
|
23 |
- "source_window": "", |
|
24 |
- "last_content_update": 1643379388859, |
|
25 |
- "read_only": false, |
|
26 |
- "read_only_alternatives": [] |
|
27 |
-} |
|
28 | 0 |
\ No newline at end of file |
29 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,37 +0,0 @@ |
1 |
- |
|
2 |
-mk_test_trend <- function (x, alternative = c("two.sided", "greater", |
|
3 |
- "less"), continuity = TRUE) |
|
4 |
-{ |
|
5 |
- if (!is.numeric(x)) { |
|
6 |
- stop("'x' must be a numeric vector") |
|
7 |
- } |
|
8 |
- n <- length(x) |
|
9 |
- if (n < 3) { |
|
10 |
- stop("'x' must have at least 3 elements") |
|
11 |
- } |
|
12 |
- na.fail(x) |
|
13 |
- alternative <- match.arg(alternative) |
|
14 |
- S <- .mkScore(x) |
|
15 |
- t <- table(x) |
|
16 |
- names(t) <- NULL |
|
17 |
- varS <- .varmk(t, n) |
|
18 |
- D <- .Dfn(t, n) |
|
19 |
- tau <- S/D |
|
20 |
- if (continuity) { |
|
21 |
- sg <- sign(S) |
|
22 |
- z <- sg * (abs(S) - 1)/sqrt(varS) |
|
23 |
- } |
|
24 |
- else { |
|
25 |
- z <- S/sqrt(varS) |
|
26 |
- } |
|
27 |
- pval <- switch(alternative, two.sided = 2 * min(0.5, pnorm(abs(z), |
|
28 |
- lower.tail = FALSE)), greater = pnorm(z, lower.tail = FALSE), |
|
29 |
- less = pnorm(z, lower.tail = TRUE)) |
|
30 |
- DNAME <- deparse(substitute(x)) |
|
31 |
- ans <- list(data.name = DNAME, p.value = pval, statistic = c(z = z), |
|
32 |
- null.value = c(S = 0), parameter = c(n = n), estimates = c(S = S, |
|
33 |
- varS = varS, tau = tau), alternative = alternative, |
|
34 |
- method = "Mann-Kendall trend test", pvalg = pval) |
|
35 |
- class(ans) <- "htest" |
|
36 |
- return(ans) |
|
37 |
-} |
38 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "4AE107A5", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/buffer_spot_functions.R", |
|
4 |
- "project_path": "R/buffer_spot_functions.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1644591069314.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 10, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "23,9", |
|
16 |
- "scrollLine": "7" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647760747, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647760747, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,85 +0,0 @@ |
1 |
-#' Extract buffer spots of data |
|
2 |
-#' |
|
3 |
-#' @param Data1 An object of the class data frame |
|
4 |
-#' @param buffer_spot A character string containing the name of the buffer |
|
5 |
-#' spots. |
|
6 |
-#' @import dplyr |
|
7 |
-#' @importFrom dplyr select filter |
|
8 |
-#' @description A function to extract the buffer spots data. A buffer spot only |
|
9 |
-#' has the solution for |
|
10 |
-#' proprietary ingredients for stabilizing protein and minimizing evaporation. |
|
11 |
-#' @return A data frame of the buffer control spots |
|
12 |
-#' @export |
|
13 |
-#' |
|
14 |
-#' @examples |
|
15 |
-#' bg_correct_df <- readr::read_csv(system.file("extdata", "Data1_sample.csv", |
|
16 |
-#' package="protGear")) |
|
17 |
-#' buffer_spots(Data1 = bg_correct_df) |
|
18 |
-buffer_spots <- function(Data1 , buffer_spot = "buffer") { |
|
19 |
- Data2_buffer <- Data1 %>% |
|
20 |
- # within each Name count sampleID. We had grouped this earlier |
|
21 |
- # n() - gives number of observations in the current group |
|
22 |
- mutate(replicate = 1:n()) %>% |
|
23 |
- # Select only relevant variables |
|
24 |
- dplyr:::select(sampleID, |
|
25 |
- antigen, |
|
26 |
- replicate, |
|
27 |
- FMedianBG_correct, |
|
28 |
- Block, |
|
29 |
- Column, |
|
30 |
- Row) |
|
31 |
- |
|
32 |
- if (buffer_spot == 'buffer') { |
|
33 |
- Data2_buffer <- Data2_buffer %>% |
|
34 |
- filter(grepl('^[bB][Uu][Ff][Ff][Ee][Rr]', antigen)) |
|
35 |
- } else { |
|
36 |
- Data2_buffer <- Data2_buffer %>% |
|
37 |
- filter(grepl(paste0('^', buffer_spot), tolower(antigen))) |
|
38 |
- } |
|
39 |
- |
|
40 |
- |
|
41 |
- Data2_buffer <- Data2_buffer %>% |
|
42 |
- # combine Name and replicate |
|
43 |
- unite(antigen, antigen, replicate) |
|
44 |
- return(Data2_buffer) |
|
45 |
-} |
|
46 |
- |
|
47 |
- |
|
48 |
-#' Plot the buffer values |
|
49 |
-#' |
|
50 |
-#' @param buffer_names A character string containing the name of the variable |
|
51 |
-#' with buffer spots. Default set to 'antigen'. |
|
52 |
-#' @param buffer_mfi A character string containing the name of the variable with |
|
53 |
-#' MFI value.Assuming background correction is done already. |
|
54 |
-#' Default to 'FMedianBG_correct' |
|
55 |
-#' @param slide_id A character string containing the name of the slide/array |
|
56 |
-#' identifier variable. |
|
57 |
-#' @param df A data frame to be used to plot |
|
58 |
-#' @import dplyr ggplot2 |
|
59 |
-#' @importFrom dplyr select filter |
|
60 |
-#' @importFrom gtools mixedsort |
|
61 |
-#' @importFrom ggplot2 ggplot |
|
62 |
-#' @return plot of buffer spots |
|
63 |
-#' @export |
|
64 |
-#' |
|
65 |
-#' @examples |
|
66 |
-#'buffers <- readr::read_csv(system.file("extdata", "buffers_sample2.csv", |
|
67 |
-#'package="protGear")) |
|
68 |
-#' plot_buffer(df=buffers,buffer_names = "sampleID") |
|
69 |
-plot_buffer <- function(df = buffers, |
|
70 |
- buffer_names = "antigen", |
|
71 |
- buffer_mfi = "FMedianBG_correct", |
|
72 |
- slide_id = ".id") { |
|
73 |
- x <- buffer_names |
|
74 |
- y <- buffer_mfi |
|
75 |
- df[[buffer_names]] <- factor(df[[buffer_names]] , |
|
76 |
- levels = mixedsort(unique(as.character(df[[buffer_names]])))) |
|
77 |
- |
|
78 |
- |
|
79 |
- p <- ggplot(data = df, aes_string(x = x, y = y)) + |
|
80 |
- geom_jitter(aes_string(x = x, y = y, color = slide_id)) + |
|
81 |
- geom_boxplot(aes_string(x = x, y = y), alpha = 0.2) + |
|
82 |
- theme_light() + |
|
83 |
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) |
|
84 |
- return(p) |
|
85 |
-} |
86 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,24 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "4D556D40", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/runthis.R", |
|
4 |
- "project_path": null, |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "3771495863", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647626114720.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 13, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source" |
|
15 |
- }, |
|
16 |
- "folds": "", |
|
17 |
- "lastKnownWriteTime": 1598965142, |
|
18 |
- "encoding": "UTF-8", |
|
19 |
- "collab_server": "", |
|
20 |
- "source_window": "", |
|
21 |
- "last_content_update": 1598965142, |
|
22 |
- "read_only": false, |
|
23 |
- "read_only_alternatives": [] |
|
24 |
-} |
|
25 | 0 |
\ No newline at end of file |
2 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "5344A29F", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/NEWS.md", |
|
4 |
- "project_path": "NEWS.md", |
|
5 |
- "type": "markdown", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647521025022.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 8, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "7,38", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647521040, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647521040512, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,13 +0,0 @@ |
1 |
-# Changes in version 0.99.1 (2021-12-15) |
|
2 |
- |
|
3 |
-+ Submitted to Bioconductor |
|
4 |
- |
|
5 |
-# Changes in version 0.99.543 (2022-02-14) |
|
6 |
-+ Submitted to Bioconductor for review |
|
7 |
- |
|
8 |
-# Changes in version 0.99.544 (2022-03-17) |
|
9 |
-+ Removed unnecessary files |
|
10 |
-+ Added Suggests in Description |
|
11 |
-+ Updated R requirement to 4.2 |
|
12 |
-+ Added the data documentation |
|
13 |
- |
14 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "5B090C88", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/inst/shiny-examples/protGear_interactive/app.R", |
|
4 |
- "project_path": "inst/shiny-examples/protGear_interactive/app.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1644833056795.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 15, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "6,4", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1619183120, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1619183120, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,54 +0,0 @@ |
1 |
-library(shiny) |
|
2 |
-library(shinyFiles) |
|
3 |
- |
|
4 |
-# Define UI for application that draws a histogram |
|
5 |
-ui <- fluidPage( |
|
6 |
- |
|
7 |
- # Application title |
|
8 |
- titlePanel("Example - Shiny Files Buttons Use"), |
|
9 |
- |
|
10 |
- # Sidebar with a slider input for number of bins |
|
11 |
- sidebarLayout( |
|
12 |
- sidebarPanel( |
|
13 |
- p("First press this ShinyDir Button to locate an existing folder and set |
|
14 |
- it to be your new working directory"), |
|
15 |
- shinyDirButton("folderChoose","Choose Folder","Choose working directory"), |
|
16 |
- |
|
17 |
- p("In this second step, press the ShinyFiles Button that should point to the |
|
18 |
- volumes (Just as the previous button does). In my R version, this is where it |
|
19 |
- renders a white interface (see attached picture"), |
|
20 |
- shinyFilesButton("filesChoose1","Files Chooser 1","Choose your files", |
|
21 |
- multiple=TRUE), |
|
22 |
- |
|
23 |
- p("In this third button, pI owuld like to link the ShinyFiles interface with |
|
24 |
- the selected folder in button 1. This part I have not been able to solve"), |
|
25 |
- shinyFilesButton("filesChoose2","Files Chooser 2","Choose your files", |
|
26 |
- multiple=TRUE) |
|
27 |
- |
|
28 |
- ), |
|
29 |
- |
|
30 |
- # Show a plot of the generated distribution |
|
31 |
- mainPanel( |
|
32 |
- verbatimTextOutput("path") |
|
33 |
- ) |
|
34 |
- ) |
|
35 |
- ) |
|
36 |
- |
|
37 |
-# Define server logic required to draw a histogram |
|
38 |
-server <- function(input, output, session) { |
|
39 |
- |
|
40 |
- volumes <- getVolumes() |
|
41 |
- shinyDirChoose(input, "folderChoose", roots = volumes, session = session) |
|
42 |
- sel_path <- reactive({return(print(parseDirPath(volumes, input$folderChoose)))}) |
|
43 |
- shinyFileChoose(input, "filesChoose1", roots = volumes, session = session) |
|
44 |
- |
|
45 |
- |
|
46 |
- setWorkingDir<-eventReactive(input$folderChoose,{ |
|
47 |
- setwd(sel_path()) |
|
48 |
- }) |
|
49 |
- |
|
50 |
- output$path<-renderText(sel_path()) |
|
51 |
-} |
|
52 |
- |
|
53 |
-# Run the application |
|
54 |
-shinyApp(ui = ui, server = server) |
55 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "68D04000", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/vignettes/vignette.Rmd", |
|
4 |
- "project_path": "vignettes/vignette.Rmd", |
|
5 |
- "type": "r_markdown", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": true, |
|
9 |
- "created": 1647516170123.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 7, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "258,0", |
|
16 |
- "scrollLine": "0", |
|
17 |
- "last_setup_crc32": "" |
|
18 |
- }, |
|
19 |
- "folds": "", |
|
20 |
- "lastKnownWriteTime": 1647443184, |
|
21 |
- "encoding": "UTF-8", |
|
22 |
- "collab_server": "", |
|
23 |
- "source_window": "", |
|
24 |
- "last_content_update": 1647516586911, |
|
25 |
- "read_only": false, |
|
26 |
- "read_only_alternatives": [] |
|
27 |
-} |
|
28 | 0 |
\ No newline at end of file |
29 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,629 +0,0 @@ |
1 |
-title: protGear vignette |
|
2 |
- processing suite |
|
3 |
-author: "Kennedy Mwai" |
|
4 |
-header-includes: |
|
5 |
- - \usepackage[T1]{fontenc} |
|
6 |
-date: '`r format(Sys.time(), "%d %B, %Y")`' |
|
7 |
-package: protGear |
|
8 |
-vignette: > |
|
9 |
- %\VignetteIndexEntry{protGear} |
|
10 |
- %\VignetteEncoding{UTF-8} |
|
11 |
- %\VignetteEngine{knitr::rmarkdown} |
|
12 |
-link-citations: true |
|
13 |
-output: |
|
14 |
- html_document: |
|
15 |
- df_print: paged |
|
16 |
- toc: true |
|
17 |
- toc_float: |
|
18 |
- collapsed: false |
|
19 |
- number_sections: false |
|
20 |
- theme: spacelab |
|
21 |
- highlight: tango |
|
22 |
- pdf_document: default |
|
23 |
- |
|
24 |
- |
|
25 |
- |
|
26 |
-```{r setup, message=FALSE, warning=FALSE} |
|
27 |
-library(ggpubr) |
|
28 |
-library(gtools) |
|
29 |
-library(purrr) |
|
30 |
-library(scales) |
|
31 |
-library(pheatmap) |
|
32 |
-library(data.table) |
|
33 |
-library(kableExtra) |
|
34 |
-library(gridExtra) |
|
35 |
-library(png) |
|
36 |
-library(knitr) |
|
37 |
-library(grid) |
|
38 |
-library(styler) |
|
39 |
-library(FactoMineR) |
|
40 |
-library(factoextra) |
|
41 |
-library(magick) |
|
42 |
-library(rlang) |
|
43 |
-library(GGally) |
|
44 |
-library(ggplotify) |
|
45 |
-library(remotes) |
|
46 |
-library(dplyr) |
|
47 |
-library(tidyr) |
|
48 |
-knitr::opts_chunk$set(echo = TRUE, message=FALSE,warning = FALSE, |
|
49 |
- fig.align = 'center', |
|
50 |
- dev = "png", |
|
51 |
- tidy='styler', tidy.opts=list(strict=TRUE)) |
|
52 |
- |
|
53 |
- |
|
54 |
-``` |
|
55 |
- |
|
56 |
- |
|
57 |
-```{css ,echo=FALSE} |
|
58 |
-.custom-inline { |
|
59 |
- color: red; |
|
60 |
- font-weight: 700 |
|
61 |
-} |
|
62 |
-``` |
|
63 |
- |
|
64 |
-## Introduction |
|
65 |
- |
|
66 |
-### General information |
|
67 |
-protGear is a package for protein micro data processing just before the main analysis. The package loads the '`gpr`' or '`txt`' file format extracted by the quantification software and merges this with the specific sample identifiers. The package processes multiple files extracted batch by batch with their corresponding sample identifier file. The sample identifier file has 2 variables '`v1`' and '`v2`' which indicate the mini-array or block number and sample identifier respectively. The '`gpr`' file and the corresponding sample identifier file have the same file name. protGear also provides a web based $Shiny^{(R)}$ platform for real time visualization of the data processing. |
|
68 |
- |
|
69 |
-In this vignette the general work-flow of protGear will be outlined by processing a sample dataset from a multicentre study __Plasmodium falciparum__ Merozoite Protein Microarray. The multicentre study design motivated the development of the protGear suite. |
|
70 |
- |
|
71 |
-The details of the methodologies are published here https://doi.org/10.1016/j.csbj.2021.04.044 |
|
72 |
- |
|
73 |
-### Analysis setup |
|
74 |
-Create 2 folders that hold the '`.gpr`' files and the corresponding sample identifier files. |
|
75 |
- |
|
76 |
-{width=70%} |
|
77 |
- |
|
78 |
- |
|
79 |
- |
|
80 |
-## Sample identifier file |
|
81 |
- |
|
82 |
- |
|
83 |
- |
|
84 |
-### Installation |
|
85 |
- |
|
86 |
-To install `protGear` from BioConductor the following commands in R |
|
87 |
- |
|
88 |
-```{r install_package, eval=FALSE} |
|
89 |
-## install from BioConductor |
|
90 |
-if (!require("BiocManager", quietly = TRUE)) |
|
91 |
- install.packages("BiocManager") |
|
92 |
-BiocManager::install('protGear') |
|
93 |
-``` |
|
94 |
- |
|
95 |
- |
|
96 |
-## Importing data |
|
97 |
- |
|
98 |
-```{r call_protGear} |
|
99 |
-## load the package |
|
100 |
-library(protGear) |
|
101 |
-``` |
|
102 |
- |
|
103 |
-The first part is to specify the parameters of the micro-array experiment to assist in processing the data. The parameters specified are |
|
104 |
- |
|
105 |
- - channel - The scanner fluorescence output used to record the data. It can be green,red,blue among others with a specified number. |
|
106 |
- - chip_path - the folder where the sub folders of 'gpr' or 'txt' files are stored. This path contains sub folders with the array data, for example the sub folders for the different batches. |
|
107 |
- - totsamples - the number of samples in a slide or array. |
|
108 |
- - blockspersample - The number of blocks a sample it takes. In this example each sample occupies 2 blocks each with 384 spots. |
|
109 |
- - sampleID_path - the folder where the sample identifiers files are stored |
|
110 |
- - machine - The indicator for which machine was used to hybridize the samples if the experiment had more than one machine. |
|
111 |
- - date_process -the date of sample processing |
|
112 |
- |
|
113 |
-The parameters "`chip_path`", "`channel`" , "`totsamples`" and "`sampleID_path`" are mandatory. |
|
114 |
- |
|
115 |
-```{r array_params} |
|
116 |
-## specify the the parameters to process the data |
|
117 |
-genepix_vars <- array_vars(channel="635" , |
|
118 |
- chip_path = system.file("extdata/array_data/", package="protGear"), |
|
119 |
- totsamples = 21, |
|
120 |
- blockspersample = 2, |
|
121 |
- sampleID_path = system.file("extdata/array_sampleID/", package="protGear"), |
|
122 |
- mig_prefix = "_first", |
|
123 |
- machine =1, |
|
124 |
- ## optional |
|
125 |
- date_process = "0520") |
|
126 |
-``` |
|
127 |
- |
|
128 |
- |
|
129 |
-The exact channel used should be checked in the header of the file from the quantification software under `Wavelengths`. |
|
130 |
- |
|
131 |
-```{r gpr_header} |
|
132 |
-header_gpr <- readLines(system.file("extdata/array_data/machine1/KK2-06.txt", package="protGear"), |
|
133 |
- n=40) |
|
134 |
-header_gpr <- gsub("\"", "", header_gpr[1:32]) |
|
135 |
-header_gpr[1:32] |
|
136 |
-``` |
|
137 |
- |
|
138 |
- |
|
139 |
-The function `r text_spec("check_sampleID_files()",color="red")` helps to check whether each micro array file has a corresponding sample identifier file. The sample identifier files are generated from the lab plate maps to match the corresponding samples on a specific slide.If the sample identifier file is missing, protGear automatically generates the id's. |
|
140 |
- |
|
141 |
-### Spatial structure of slide |
|
142 |
- |
|
143 |
-protGear offers a functionality to inspect the slide visually for any strong spatial biases when the scan image is not available. However, we recommend using the scanned image to visualize the spatial artefacts that might not be recorded in the `.gpr` file. We include the functions `r text_spec("visualize_slide()",color="red")` and `r text_spec("visualize_slide_2d()",color="red")` to check the spatial structure. The functions are build on `r text_spec("structure_plot()",color="red")` which shows the block and mini-array structure of a slide. |
|
144 |
- |
|
145 |
- |
|
146 |
- |
|
147 |
-#### Visualize the foreground MFI |
|
148 |
- |
|
149 |
-Here we visualize the foreground MFI using the `r text_spec("visualize_slide",color="red")` function |
|
150 |
- |
|
151 |
-```{r slide_struct, fig.align='left'} |
|
152 |
-visualize_slide(infile=system.file("extdata/array_data/machine1/KK2-06.txt", package="protGear"), |
|
153 |
- MFI_var ='F635 Median' ) |
|
154 |
- |
|
155 |
-``` |
|
156 |
- |
|
157 |
-#### Visualize the background MFI |
|
158 |
- |
|
159 |
-Here we visualize the background MFI using the `visualize_slide_2d` function |
|
160 |
- |
|
161 |
-```{r slide_struct_2d, fig.align='left'} |
|
162 |
-visualize_slide_2d(infile =system.file("extdata/array_data/machine1/KK2-06.txt", package="protGear"), |
|
163 |
- MFI_var ='B635 Median' ) |
|
164 |
-``` |
|
165 |
- |
|
166 |
- |
|
167 |
-### Import .gpr/txt data |
|
168 |
- |
|
169 |
-Microarray data is imported using the `r text_spec("read_array_files()",color="red")` function. The function accepts the following mandatory arguments; |
|
170 |
- |
|
171 |
- - `filename` - the name of the file which the data are to be read from. In this example a list of multiple files from a folder is used and passed to `r text_spec("read_array_files()","red")` using `r text_spec("purrr",color="red")`. |
|
172 |
- - `data_path` - The path where the file with the data is located |
|
173 |
- - `genepix_vars` - A list of specific definitions of the experiment design. See `r text_spec("array_vars()",color="red")` |
|
174 |
- |
|
175 |
-For this example I use the sub-folder 1 specified using `genepix_vars$paths[[1]]` which is this path under vignette folder `r genepix_vars$paths[[1]]`. |
|
176 |
- |
|
177 |
-```{r array_files} |
|
178 |
-#### read in all the datasets |
|
179 |
-### list all the file names under data folder |
|
180 |
-filenames <- list.files(file.path(genepix_vars$paths[[1]]), |
|
181 |
- pattern="*.txt$|*.gpr$", full.names=FALSE) |
|
182 |
-#' @___________________read_in_the_files_with_the_text_data_from_the_chip_____________________________ |
|
183 |
-### read all the data files and save them in a list |
|
184 |
-data_path <- paste0(genepix_vars$paths[[1]],"/") |
|
185 |
-data_files <- purrr::map(.x = filenames, |
|
186 |
- .f = read_array_files, |
|
187 |
- data_path=data_path , |
|
188 |
- genepix_vars=genepix_vars) |
|
189 |
-data_files <- set_names(data_files, purrr::map(filenames, name_of_files)) |
|
190 |
-``` |
|
191 |
- |
|
192 |
- |
|
193 |
-## Background Correction |
|
194 |
- |
|
195 |
-Background noise is caused by non-specific fluorescence such as auto-fluorescence of the glass slide or non-specific binding of parameters and reference substances. To cut down the effect of background noise we have included different approaches for background correction. First, we extract the background values, visualize and select the best background approach. We have implemented five different approaches; |
|
196 |
- |
|
197 |
- 1) Local background subtraction |
|
198 |
- 2) Global background subtraction |
|
199 |
- 3) Moving minimum background subtraction |
|
200 |
- 4) Normal and exponential model (normexp) |
|
201 |
- 5) Log-linear background correction (Edwards) |
|
202 |
- |
|
203 |
-In '`.gpr`' files the Background column starts with a '`B`' followed by the wavelength or channel. In order to perform background correction, we extract the background mean fluorescent intensities (MFI's) using the function `r text_spec("extract_bg()",color="red")` . The function accepts the arguments `iden` which is the file identifier, `data_files` a list of data objects with names utilised by `iden` and `genepix_vars` defined using `r text_spec("array_vars()",color="red")` function. We utilise the `purr::map` function to extract the background MFI of multiple data files. |
|
204 |
- |
|
205 |
-```{r bg_data} |
|
206 |
-## utilising the map package we process a number of files under data_files list |
|
207 |
-dfs <- names(data_files) |
|
208 |
-allData_bg <- purrr::map(.x=dfs, .f=extract_bg,data_files=data_files,genepix_vars) |
|
209 |
-allData_bg <- set_names(allData_bg, purrr::map(filenames, name_of_files)) |
|
210 |
-allData_bg <- plyr::ldply(allData_bg) |
|
211 |
-``` |
|
212 |
- |
|
213 |
-### Foreground vs Background |
|
214 |
- |
|
215 |
-Before selecting the best background correction approach the MFI signals are be inspected visually. In protGear we first utilise the function `r text_spec("plot_FB()",color="red")` that graphs the background, _BG_Median_ and foreground values, _FBG_Median_. On the protGear Shiny platform the visuals are interactive and you can identify the features or blocks with strong bias. |
|
216 |
- |
|
217 |
-```{r bg_vs_fg} |
|
218 |
-p1 <- plot_FB(allData_bg,antigen_name="antigen", |
|
219 |
- bg_MFI="BG_Median",FG_MFI="FBG_Median",log=FALSE) |
|
220 |
- |
|
221 |
- |
|
222 |
-p1 |
|
223 |
- |
|
224 |
-``` |
|
225 |
- |
|
226 |
-### Background MFI by blocks |
|
227 |
- |
|
228 |
-```{r bg_plot} |
|
229 |
-p2 <- plot_bg(df=allData_bg, x_axis="Block",bg_MFI="BG_Median", |
|
230 |
- log_mfi=TRUE) |
|
231 |
-p2 |
|
232 |
-``` |
|
233 |
- |
|
234 |
-### Background correction |
|
235 |
- |
|
236 |
-After background visualization and selecting the best approach the array data are merged with their specific sample identifier files. |
|
237 |
- |
|
238 |
-_`r text_spec("Note:",color="red")`_ Each array file must have its own corresponding sample identifier `.csv` file stored in `r text_spec("array_vars()",color="red")` function under `sampleID_path`. Check General information section. |
|
239 |
- |
|
240 |
- |
|
241 |
-The method of background subtraction selected is specified under `r text_spec("method",color="red")` below. The background correction is performed by `r text_spec("bg_correct()",color="red")` function. |
|
242 |
- |
|
243 |
- |
|
244 |
- |
|
245 |
-```{r bg_correct} |
|
246 |
-sample_ID_merged_dfs <- purrr::map(.x=dfs, .f=merge_sampleID ,data_files=data_files , |
|
247 |
- genepix_vars, method="subtract_local") |
|
248 |
-sample_ID_merged_dfs <- set_names(sample_ID_merged_dfs, purrr::map(filenames, name_of_files)) |
|
249 |
-``` |
|
250 |
- |
|
251 |
- |
|
252 |
-## Buffer spots |
|
253 |
- |
|
254 |
-Buffer spots are specific to the experiment design and are not always included. Buffer spots are used to check for unexpected scanning artefacts. The buffer spots should have similar values in different slides. Some outliers are expected for buffer spots close sample spots or landmark. However you can specify the name of your control antigens here `r text_spec('buffer_spot="buffer"',color="blue")` if you do not have buffer spots. |
|
255 |
- |
|
256 |
-```{r buffer_spots_data} |
|
257 |
-buffer_transp <- purrr::map(.x=sample_ID_merged_dfs, .f=buffer_spots , buffer_spot="buffer") |
|
258 |
- |
|
259 |
-buffer_transp <- set_names(buffer_transp, purrr::map(filenames, name_of_files)) |
|
260 |
- |
|
261 |
-buffers <- plyr::ldply(buffer_transp) |
|
262 |
-plot_buffer(buffers,buffer_names="antigen",buffer_mfi="FMedianBG_correct",slide_id=".id") |
|
263 |
- |
|
264 |
-``` |
|
265 |
- |
|
266 |
-## Coefficient of Variation (CV) |
|
267 |
- |
|
268 |
-To calculate the CV's we utilise the `r text_spec("cv_estimation()",color="red")` function with a `cv_cut_off` specified , sample identifier variable and antigen specified under `sampleID_var` and `antigen` respectively. The `replicate_var` and `mfi_var` identifies the variable with the replicate rank generated and MFI's values. |
|
269 |
- |
|
270 |
- |
|
271 |
-```{r replicates_plot} |
|
272 |
-#' @________________________________calculated_cv_for_each_data_file_______________________________________ |
|
273 |
-#' data without the selected mean for the best 2 CVs |
|
274 |
-dataCV <- purrr::map(.x=sample_ID_merged_dfs, .f=cv_estimation ,lab_replicates=3 , |
|
275 |
- cv_cut_off=20, |
|
276 |
- sampleID_var='sampleID', antigen_var='antigen' ,replicate_var='replicate' , |
|
277 |
- mfi_var='FMedianBG_correct') |
|
278 |
- |
|
279 |
-lab_replicates=3 |
|
280 |
-dataCV <- set_names(dataCV, purrr::map(filenames, name_of_files)) |
|
281 |
- |
|
282 |
-aa <- plyr::ldply(dataCV) |
|
283 |
-GGally::ggpairs(aa,aes(color=cvCat_all) , |
|
284 |
- columns = paste(seq_len(lab_replicates)), title = "", axisLabels = "show") + |
|
285 |
- theme_light() |
|
286 |
- |
|
287 |
-``` |
|
288 |
- |
|
289 |
-### Summary of CV values |
|
290 |
- |
|
291 |
-Here we create a summary of the CV values for each sample/slide utilising the `r text_spec("cv_by_sample_estimation()",color="red")` function. This helps to identify samples with high values of CV. On the protGear dashboard an interactive table is created to show the summary for each sample. |
|
292 |
- |
|
293 |
-```{r cv_summary} |
|
294 |
- |
|
295 |
- |
|
296 |
-#' @________________________________summary_of_cv_for_each_sample________________________________________ |
|
297 |
-#' creates summaries by cv's greater than 20 and less than 20 |
|
298 |
- |
|
299 |
-cv_cut_off <- 20 |
|
300 |
-dataCV_sample <- purrr::map(.x=dataCV, .f=protGear::cv_by_sample_estimation , cv_variable="cvCat_all", |
|
301 |
- lab_replicates=3) |
|
302 |
-dataCV_sample <- set_names(dataCV_sample, purrr::map(filenames, name_of_files)) |
|
303 |
-all_cv_sample <- plyr::ldply(dataCV_sample) |
|
304 |
- |
|
305 |
-``` |
|
306 |
- |
|
307 |
- |
|
308 |
-```{r cv_by_sample} |
|
309 |
-less_20 <- rlang::sym(paste0("CV <= ",cv_cut_off, "_perc")) |
|
310 |
-gt_20 <- rlang::sym(paste0("CV > ",cv_cut_off, "_perc")) |
|
311 |
- |
|
312 |
-less_20_per <- rlang::sym(paste0("% CV <=",cv_cut_off)) |
|
313 |
-gt_20_per <- rlang::sym(paste0("% CV >",cv_cut_off)) |
|
314 |
-ggplot(all_cv_sample)+ |
|
315 |
- geom_violin(aes(.id,`CV <= 20_perc`, color="% CV =<20")) + |
|
316 |
- geom_violin(aes(.id,`CV > 20_perc`, color="% CV >20")) + |
|
317 |
- geom_violin(aes(.id,Others_perc,color="Others")) + |
|
318 |
- ylab("% of CV") + |
|
319 |
- theme_minimal() + |
|
320 |
- ggtitle("% of CV >20 or <=20 for each slide all repeats considered") |
|
321 |
-``` |
|
322 |
- |
|
323 |
- |
|
324 |
-### Best replicates |
|
325 |
- |
|
326 |
-We have implemented a function `r text_spec("best_CV_estimation()",color="red")` to select the best replicates if an experiment has more than 2 technical replicates. This helps to select the less variable replicates. |
|
327 |
- |
|
328 |
-```{r best_reps} |
|
329 |
-#' @________________________________data_with_selected_best_2_CV_______________________________________ |
|
330 |
-#' data with the selected mean for the best 2 CVs |
|
331 |
-dataCV_best2 <- purrr::map(.x=dataCV, .f=best_CV_estimation , slide_id="iden",lab_replicates=3, |
|
332 |
- cv_cut_off=20) |
|
333 |
- |
|
334 |
-## give the names to the returned list |
|
335 |
-dataCV_best2 <- set_names(dataCV_best2, purrr::map(filenames, name_of_files)) |
|
336 |
- |
|
337 |
- |
|
338 |
-dataCV_sample_best2 <- purrr::map(.x=dataCV_best2, .f=cv_by_sample_estimation , |
|
339 |
- cv_variable="best_CV_cat",lab_replicates=3) |
|
340 |
-dataCV_sample_best2 <- set_names(dataCV_sample_best2, purrr::map(filenames, name_of_files)) |
|
341 |
-all_cv_sample_best2 <- plyr::ldply(dataCV_sample_best2) |
|
342 |
-``` |
|
343 |
- |
|
344 |
- |
|
345 |
-On the violin below we observe that with selecting the less variable replicates , the percentage of the "good CV" values increases. |
|
346 |
- |
|
347 |
-```{r best_cv_plot} |
|
348 |
-## plot only the CV perccentages |
|
349 |
-ggplot(all_cv_sample_best2)+ |
|
350 |
- geom_violin(aes(.id,`CV <= 20_perc`, color="% CV =<20")) + |
|
351 |
- geom_violin(aes(.id,`CV > 20_perc`, color="% CV >20")) + |
|
352 |
- geom_violin(aes(.id,Others_perc,color="Others")) + |
|
353 |
- ylab("% of CV") + |
|
354 |
- theme_minimal() + |
|
355 |
- ggtitle("% of CV >20 or <=20 for each slide") |
|
356 |
-``` |
|
357 |
- |
|
358 |
- |
|
359 |
-## Tag subtraction |
|
360 |
- |
|
361 |
-Tag subtraction is applied for antigens containing purification tags. A file with the specific TAG name for each antigen is loaded. The file has the `antigen,TAG and TAG_name` and the TAG_name must be part of the antigens listed. |
|
362 |
- |
|
363 |
-```{r tag_file} |
|
364 |
-tag_file <- read.csv(system.file("extdata/TAG_antigens.csv", package="protGear")) |
|
365 |
-tag_antigens <- c("CD4TAG" , "GST", "MBP") |
|
366 |
-batch_vars <- list(machine="m1", day="0520") |
|
367 |
-``` |
|
368 |
- |
|
369 |
- |
|
370 |
-### Overview of the TAG file |
|
371 |
- |
|
372 |
-```{r tag_glimpse} |
|
373 |
-tb1 <- data.frame(head(tag_file, n=10)) |
|
374 |
-tb1 %>% |
|
375 |
- kable() %>% |
|
376 |
- kable_styling() |
|
377 |
-``` |
|
378 |
- |
|
379 |
- |
|
380 |
-### Subtracting the TAG values |
|
381 |
- |
|
382 |
-The function `r text_spec("tag_subtract()",color="red")` implements the TAG subtration by matching the TAG names with the corresponding TAG values. |
|
383 |
- |
|
384 |
-```{r tag_subtract} |
|
385 |
-#' @________________________________subtract_the_tag_values_______________________________________ |
|
386 |
-#' |
|
387 |
-## tag subtract |
|
388 |
-## read in the KILCHip TAG file to substract GST-1, MBP -2 and CD4TAG - 0 file |
|
389 |
-dataCV_tag <- purrr::map(.x=dataCV_best2, .f=tag_subtract , |
|
390 |
- tag_antigens=tag_antigens, mean_best_CV_var="mean_best_CV",tag_file=tag_file, |
|
391 |
- antigen_var='antigen', |
|
392 |
- batch_vars=batch_vars) |
|
393 |
-dataCV_tag <- set_names(dataCV_tag, purrr::map(filenames, name_of_files)) |
|
394 |
-dataCV_tag <- plyr::ldply(dataCV_tag) |
|
395 |
-``` |
|
396 |
- |
|
397 |
- |
|
398 |
-In this example here we plot a graph of antigens Tagged with GST before and after TAG subtraction. |
|
399 |
- |
|
400 |
-```{r before_after_tag} |
|
401 |
-aaa <- dataCV_tag %>% |
|
402 |
- filter(TAG_name=="GST") |
|
403 |
- |
|
404 |
-aaa <- aaa %>% |
|
405 |
-dplyr::select(.id,sampleID,antigen,mean_best_CV,mean_best_CV_tag) |
|
406 |
- |
|
407 |
-aaa <- aaa %>% |
|
408 |
- gather(measure,mfi,-c(.id:antigen)) |
|
409 |
- |
|
410 |
-ggplot(aaa,aes(as.factor(antigen),mfi,color=measure)) + |
|
411 |
- geom_boxplot(aes(fill=measure),alpha=0.5)+ |
|
412 |
- theme_light() + |
|
413 |
- xlab("antigen name")+ |
|
414 |
- ggtitle("Before and after TAG subtraction") + |
|
415 |
- theme(axis.text.x = element_text(angle = 45, hjust = 1)) |
|
416 |
-``` |
|
417 |
- |
|
418 |
- |
|
419 |
-## Normalisation |
|
420 |
- |
|
421 |
-To normalise the data, we create a matrix `matrix_antigen` with all the data combined. We ensure the slide identifier is included as row names of the matrix or we have a way to join them after normalisation. The `array_matrix` matrix is used to hold these parameters. In place of `AMA1` in the example below you use one of your features or antigen. We have implemented four different normalisation techniques; |
|
422 |
- |
|
423 |
-1) $Log_2$ normalisation |
|
424 |
-2) Loess normalisation |
|
425 |
-3) RLM normalisation |
|
426 |
-4) VSN normalisation. |
|
427 |
- |
|
428 |
- |
|
429 |
-```{r normalisation} |
|
430 |
-df_to_normalise <- dataCV_tag %>% ungroup() %>% |
|
431 |
- dplyr::select(slide=.id,sampleID,sample_array_ID,antigen,mean_best_CV) %>% |
|
432 |
- group_by(sampleID, slide) |
|
433 |
-df_to_normalise$sample_index <- group_indices(.data =df_to_normalise ) |
|
434 |
- |
|
435 |
-### |
|
436 |
-to_normalise <- df_to_normalise %>% |
|
437 |
- ungroup() %>% dplyr::select(-slide,-sampleID,-sample_array_ID) %>% |
|
438 |
- dplyr::select(antigen, sample_index, everything()) %>% |
|
439 |
- gather(variable, value, -(antigen:sample_index)) %>% |
|
440 |
- unite(temp, antigen ) %>% dplyr::select(-variable) %>% |
|
441 |
- spread(temp, value) %>% |
|
442 |
- as.data.frame(.) |
|
443 |
- |
|
444 |
-### get the row names of the machine data |
|
445 |
-row.names(to_normalise) <- to_normalise$sample_index |
|
446 |
-#batch_all <- as.factor(paste0(to_normalise$machine,"/",to_normalise$day)) |
|
447 |
-#machines <- as.factor(to_normalise$machine) |
|
448 |
-#day_batches <- as.factor(to_normalise$day) |
|
449 |
- |
|
450 |
-## create the matrix to normalise |
|
451 |
-matrix_antigen <- to_normalise %>% |
|
452 |
- dplyr::select(-sample_index) %>% |
|
453 |
- as.matrix(.) |
|
454 |
- |
|
455 |
- |
|
456 |
-## create the matrix to hold the important parameters |
|
457 |
-## in place of AMA1 you use one of your features or antigen |
|
458 |
-array_matrix <- df_to_normalise %>% |
|
459 |
- filter(antigen=="AMA1") %>% |
|
460 |
- ungroup() %>% |
|
461 |
- dplyr::select(sample_array_ID,sample_index,slide) |
|
462 |
- |
|
463 |
-control_antigens <- c("CommercialHumanIgG","CD4TAG") |
|
464 |
- |
|
465 |
-``` |
|
466 |
- |
|
467 |
- |
|
468 |
-The `r text_spec("matrix_normalise()",color="red")` function is used to normalise the data and returns a list of plots and a matrix of normalised values. A plot is returned if ` return_plot = TRUE` otherwise only a matrix of normalised values will be returned. |
|
469 |
- |
|
470 |
-```{r normalised_df} |
|
471 |
-normlise_df <- matrix_normalise(matrix_antigen, method = "vsn", array_matrix=array_matrix, |
|
472 |
- return_plot = TRUE,control_antigens=control_antigens) |
|
473 |
- |
|
474 |
-normlise_df$plot_normalisation |
|
475 |
- |
|
476 |
-``` |
|
477 |
- |
|
478 |
- |
|
479 |
-### Compare normalisation methods |
|
480 |
- |
|
481 |
-On the dashboard we compare the different normalisation techniques using the `r text_spec("mutiple_plot()",color="red")` function after loading the data. |
|
482 |
- |
|
483 |
- |
|
484 |
-```{r compare_methods} |
|
485 |
-control_antigens <- c("CommercialHumanIgG","CD4TAG") |
|
486 |
-## no normalisation |
|
487 |
-normalise_list_none <- matrix_normalise(matrix_antigen=matrix_antigen, |
|
488 |
- method = "none", |
|
489 |
- array_matrix=array_matrix, |
|
490 |
- return_plot = TRUE, |
|
491 |
- control_antigens=control_antigens) |
|
492 |
- names(normalise_list_none) <- c("matrix_antigen_none" ,"plot_none") |
|
493 |
-## log2 normalisation |
|
494 |
- normalise_list_log <- matrix_normalise(matrix_antigen=matrix_antigen, |
|
495 |
- method = "log2", |
|
496 |
- array_matrix=array_matrix, |
|
497 |
- return_plot = TRUE, |
|
498 |
- control_antigens=control_antigens) |
|
499 |
- names(normalise_list_log) <- c("matrix_antigen_log" ,"plot_log") |
|
500 |
-## vsn normalisation |
|
501 |
- normalise_list_vsn <- matrix_normalise(matrix_antigen=matrix_antigen, |
|
502 |
- method = "vsn", |
|
503 |
- array_matrix=array_matrix, |
|
504 |
- return_plot = TRUE, |
|
505 |
- control_antigens=control_antigens) |
|
506 |
- names(normalise_list_vsn) <- c("matrix_antigen_vsn" ,"plot_vsn") |
|
507 |
- ## cyclic loess with log |
|
508 |
- normalise_list_cyclic_loess_log <- matrix_normalise(matrix_antigen=matrix_antigen, |
|
509 |
- method = "cyclic_loess_log", |
|
510 |
- array_matrix=array_matrix, |
|
511 |
- return_plot = TRUE, |
|
512 |
- control_antigens=control_antigens) |
|
513 |
- names(normalise_list_cyclic_loess_log) <- c("matrix_antigen_cyclic_loess_log" , |
|
514 |
- "plot_cyclic_loess_log") |
|
515 |
- |
|
516 |
- |
|
517 |
- normalise_list_rlm <- matrix_normalise(matrix_antigen=matrix_antigen, |
|
518 |
- method = "rlm", |
|
519 |
- array_matrix=array_matrix, |
|
520 |
- return_plot = TRUE, |
|
521 |
- control_antigens=control_antigens) |
|
522 |
- names(normalise_list_rlm) <- c("matrix_antigen_rlm" ,"plot_rlm") |
|
523 |
- |
|
524 |
- |
|
525 |
- ## create a list after normalisation |
|
526 |
- normalised_list <- c(normalise_list_none , |
|
527 |
- normalise_list_log, |
|
528 |
- normalise_list_vsn, |
|
529 |
- normalise_list_cyclic_loess_log, |
|
530 |
- normalise_list_rlm) |
|
531 |
- ## |
|
532 |
- normalised_list_plot <- normalised_list[grepl("plot",names(normalised_list))] |
|
533 |
- |
|
534 |
- |
|
535 |
-``` |
|
536 |
- |
|
537 |
-```{r plot_comparison, fig.align='center', fig.width=12, fig.height=15} |
|
538 |
-p <- do.call("grid.arrange", c(normalised_list_plot, ncol=2)) |
|
539 |
-``` |
|
540 |
- |
|
541 |
- |
|
542 |
-## Heatmaps |
|
543 |
- |
|
544 |
-```{r heat_norm} |
|
545 |
-norm_df <- normlise_df$matrix_antigen_normalised |
|
546 |
-norm_df <- norm_df %>% |
|
547 |
- dplyr::select(-control_antigens) |
|
548 |
-p3 <- pheatmap::pheatmap(norm_df ,scale = "none", cluster_rows = FALSE, |
|
549 |
- main=paste('VSN',"Normalised Data"), |
|
550 |
- silent = TRUE) |
|
551 |
-#------- |
|
552 |
-## if you want to save the file |
|
553 |
-# p3 <- ggplotify::as.ggplot(p3) |
|
554 |
-# p <- p3 + theme_void() |
|
555 |
-# ggsave(p , |
|
556 |
-# filename ="heatmap.PNG" , |
|
557 |
-# width = 16 , height = 12 , |
|
558 |
-# limitsize = FALSE, |
|
559 |
-# dpi=200 ) |
|
560 |
-#------- |
|
561 |
-p3 |
|
562 |
-``` |
|
563 |
- |
|
564 |
- |
|
565 |
- |
|
566 |
-## PCA analysis |
|
567 |
- |
|
568 |
-```{r pca, fig.align='center',fig.width=16,fig.height=12} |
|
569 |
-norm_df <- normlise_df$matrix_antigen_normalised |
|
570 |
-res_pca <- prcomp( norm_df, scale = TRUE) |
|
571 |
-var <- get_pca_var(res_pca) |
|
572 |
-vars_visualise=20 |
|
573 |
-#Visualize the PCA |
|
574 |
-## individuals contributing to the PCA |
|
575 |
-p1 <- fviz_pca_ind(res_pca, |
|
576 |
- col.var = "contrib", # Color by contributions to the PC |
|
577 |
- gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), |
|
578 |
- repel = TRUE # Avoid text overlapping |
|
579 |
- )+ theme_minimal() |
|
580 |
- |
|
581 |
-# Select the top vars_visualise contributing variables |
|
582 |
-p2 <-fviz_pca_biplot(res_pca, label="var", |
|
583 |
- select.var = list(contrib = vars_visualise)) + |
|
584 |
- theme_minimal() |
|
585 |
- |
|
586 |
- |
|
587 |
-# Total cos2 of variables on Dim.1 and Dim.2 |
|
588 |
-p3 <- fviz_cos2(res_pca, choice = "var", axes = 1:2 , top = vars_visualise) |
|
589 |
- |
|
590 |
- |
|
591 |
-# Color by cos2 values: quality on the factor map |
|
592 |
-p4 <- fviz_pca_var(res_pca, col.var = "cos2", |
|
593 |
- gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), |
|
594 |
- select.var = list(contrib = vars_visualise), |
|
595 |
- repel = TRUE # Avoid text overlapping |
|
596 |
- ) |
|
597 |
- |
|
598 |
- |
|
599 |
-## combine the plots into one grid |
|
600 |
-## combine the plots into one grid |
|
601 |
-## combine the plots into one grid |
|
602 |
-p_pca <- gridExtra::grid.arrange(p1,p2,p3,p4, ncol=2 ) |
|
603 |
-## If you want to save the file |
|
604 |
-# ggsave(p_pca , |
|
605 |
-# filename ="p_pca.PNG" , |
|
606 |
-# width = 16 , height = 12 , |
|
607 |
-# units = "in", |
|
608 |
-# limitsize = FALSE, |
|
609 |
-# dpi=300) |
|
610 |
-p_pca |
|
611 |
-``` |
|
612 |
- |
|
613 |
- |
|
614 |
- |
|
615 |
- |
|
616 |
-## Shiny application |
|
617 |
-Shiny is an R package that makes it easy to build interactive web apps straight from R. protGear has a built in user-friendly Shiny dashboard to assist in real-time processing and visualization. It provides five sequential steps for handling a data table of fluorescent intensities. |
|
618 |
-The dashboard is launched as below from R or RStudio. To Install R: Open an internet browser and go to www.r-project.org. |
|
619 |
- |
|
620 |
-```{r shiny_launch, eval=FALSE} |
|
621 |
-protGear::launch_protGear_interactive() |
|
622 |
-``` |
|
623 |
- |
|
624 |
-```{r} |
|
625 |
-sessionInfo() |
|
626 |
-``` |
|
627 |
- |
628 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "6E10D617", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/read_files_functions.R", |
|
4 |
- "project_path": "R/read_files_functions.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": true, |
|
9 |
- "created": 1647521201664.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 10, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "377,26", |
|
16 |
- "scrollLine": "363" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647580069, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647624618732, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,1021 +0,0 @@ |
1 |
-#' Read multiple array files |
|
2 |
-#' @title Read array files |
|
3 |
-#' @param i The name of the file which the data are to be read from. |
|
4 |
-#' @param data_path The path where the file with the data is located |
|
5 |
-#' @param genepix_vars A list of specific definitions of the experiment design. |
|
6 |
-#' See \code{\link{array_vars}}. |
|
7 |
-#' @description This helps to read the chip file(s). |
|
8 |
-#' @return a number of data frames in the global environment |
|
9 |
-#' @importFrom data.table fread %like% |
|
10 |
-#' @importFrom rlang parse_expr := |
|
11 |
-#' @importFrom purrr set_names |
|
12 |
-#' @importFrom magrittr %>% |
|
13 |
-#' @export |
|
14 |
-#' @examples |
|
15 |
-#' ## Not run: |
|
16 |
-#' genepix_vars <- array_vars( |
|
17 |
-#' channel = "635", |
|
18 |
-#' chip_path = system.file("extdata", "array_data/machine1/", package="protGear"), |
|
19 |
-#' totsamples = 21, |
|
20 |
-#' blockspersample = 2, |
|
21 |
-#' mig_prefix = "_first", |
|
22 |
-#' machine = 1, |
|
23 |
-#' date_process = "0520" |
|
24 |
-#' ) |
|
25 |
-#' file_read <- "KK2-06.txt" |
|
26 |
-#' read_array_files(i=file_read, |
|
27 |
-#' data_path=system.file("extdata", "array_data/machine1/", |
|
28 |
-#' package="protGear"), genepix_vars=genepix_vars) |
|
29 |
-#' ## End(Not run) |
|
30 |
-read_array_files <- function(i, data_path, genepix_vars) { |
|
31 |
- ###loop through all the data to read |
|
32 |
- # skip- the number of lines of the data file to skip before beginning to read data --- |
|
33 |
- if (length(grep("Block.*Column|Column.*Block", readLines(file.path(data_path, i))) - |
|
34 |
- 1) == 1) { |
|
35 |
- x <- |
|
36 |
- grep("Block.*Column|Column.*Block", readLines(file.path(data_path, i))) - |
|
37 |
- 1 |
|
38 |
- #print(paste0(x,"_",i)) |
|
39 |
- d_f <- |
|
40 |
- data.table::fread(file.path(data_path, i), |
|
41 |
- skip = x, |
|
42 |
- header = TRUE) |
|
43 |
- ## arrange block to ensure the order is maintains |
|
44 |
- d_f <- d_f %>% arrange(Block) |
|
45 |
- ## calculate the different background methods |
|
46 |
- if (paste0(genepix_vars$BG) %ni% names(d_f)) { |
|
47 |
- genepix_vars$BG <- |
|
48 |
- names(d_f)[names(d_f) %like% paste0(genepix_vars$BG, '$')] |
|
49 |
- } |
|
50 |
- |
|
51 |
- if (paste0(genepix_vars$FG) %ni% names(d_f)) { |
|
52 |
- genepix_vars$FG <- |
|
53 |
- names(d_f)[names(d_f) %like% paste0(genepix_vars$FG, '$')] |
|
54 |
- } |
|
55 |
- |
|
56 |
- expression_med <- |
|
57 |
- paste0("median(`", genepix_vars$BG, "`, na.rm = TRUE)") |
|
58 |
- exp_minpos <- paste0("minpositive(`", genepix_vars$BG, "`)") |
|
59 |
- d_f <- d_f %>% |
|
60 |
- |
|
61 |
- mutate(global_BGMedian := !!parse_expr(expression_med)) %>% |
|
62 |
- ## minimum BG per block and >1 -- Moving minimum approach per block |
|
63 |
- group_by(Block) %>% |
|
64 |
- |
|
65 |
- mutate(minimum_BGMedian := !!parse_expr(exp_minpos)) %>% |
|
66 |
- ungroup() |
|
67 |
- |
|
68 |
- |
|
69 |
- } else |
|
70 |
- stop("File " , i, " does not have Block Column Specification") |
|
71 |
- |
|
72 |
-} |
|
73 |
- |
|
74 |
- |
|
75 |
- |
|
76 |
- |
|
77 |
-#' Extract the background values |
|
78 |
-#' @title extract bg |
|
79 |
-#' @param iden A character indicating the name of the object to be used under data_files. |
|
80 |
-#' @param data_files A list of data objects with names utilised by iden. |
|
81 |
-#' @param genepix_vars A list of specific definitions of the experiment design. See \code{\link{array_vars}}. |
|
82 |
-#' @description A generic function to extract the background data for micro array data. |
|
83 |
-#' @return A data frame of background values |
|
84 |
-#' @importFrom dplyr select arrange |
|
85 |
-#' @importFrom data.table %like% |
|
86 |
-#' @importFrom purrr set_names |
|
87 |
-#' @export |
|
88 |
-#' |
|
89 |
-#' @examples |
|
90 |
-#' ## Not run: |
|
91 |
-#' genepix_vars <- array_vars( |
|
92 |
-#' channel = "635", |
|
93 |
-#' chip_path = system.file("extdata", "array_data/machine1/", package="protGear"), |
|
94 |
-#' totsamples = 21, |
|
95 |
-#' blockspersample = 2, |
|
96 |
-#' mig_prefix = "_first", |
|
97 |
-#' machine = 1, |
|
98 |
-#' ## optional |
|
99 |
-#' date_process = "0520" |
|
100 |
-#' ) |
|
101 |
-#' #Define the data path |
|
102 |
-#' data_path <- paste0(genepix_vars$chip_path) |
|
103 |
-#' # List the file names to use |
|
104 |
-#' filenames <- list.files(genepix_vars$chip_path, |
|
105 |
-#' pattern = '*.txt$|*.gpr$', full.names = FALSE |
|
106 |
-#' ) |
|
107 |
-#' data_files <- purrr::map( |
|
108 |
-#' .x = filenames, |
|
109 |
-#' .f = read_array_files, |
|
110 |
-#' data_path = data_path, |
|
111 |
-#' genepix_vars = genepix_vars |
|
112 |
-#' ) |
|
113 |
-#' data_files <- purrr::set_names(data_files, purrr::map(filenames, name_of_files)) |
|
114 |
-#' names(data_files) |
|
115 |
-#' extract_bg(iden ="KK2-06" , data_files=data_files,genepix_vars=genepix_vars) |
|
116 |
-#' ## End(Not run) |
|
117 |
-extract_bg <- function(iden, data_files , genepix_vars = genepix_vars) |
|
118 |
-{ |
|
119 |
- ## read in the sample ID files |
|
120 |
- ## this can be pulled from a mysql table |
|
121 |
- ## if the sample ID is not available, we create an automated sampleID |
|
122 |
- if (file.exists(file.path(genepix_vars$sampleID_path, paste0(iden , ".csv")))) { |
|
123 |
- arraynames <- |
|
124 |
- read.csv( |
|
125 |
- file.path(genepix_vars$sampleID_path, paste0(iden , ".csv")) , |
|
126 |
- header = TRUE , |
|
127 |
- stringsAsFactors = FALSE , |
|
128 |
- colClasses = "character" |
|
129 |
- ) |
|
130 |
- } else{ |
|
131 |
- warning(iden, |
|
132 |
- " Not found in the sampleID files here", |
|
133 |
- genepix_vars$sampleID_path) |
|
134 |
- arraynames <- data.frame( |
|
135 |
- v1 = (1:genepix_vars$totsamples) , |
|
136 |
- v2 = paste0("SID_gen", 1:genepix_vars$totsamples), |
|
137 |
- barcode = iden |
|
138 |
- ) |
|
139 |
- |
|
140 |
- } |
|
141 |
- |
|
142 |
- |
|
143 |
- ## replicate to the number of blocks |
|
144 |
- ## make sure the block is arranged before merging with the data file |
|
145 |
- arraynames <- arraynames %>% |
|
146 |
- dplyr::select(v1, v2) %>% |
|
147 |
- arrange(as.numeric(v1)) |
|
148 |
- |
|
149 |
- ## capture errors for same sample ID in a slide |
|
150 |
- if (length(unique(arraynames$v2)) < genepix_vars$totsamples) { |
|
151 |
- sink("log_replicates.txt" , append = TRUE) |
|
152 |
- warning("Most likely there is a repeated sample name for " , iden) |
|
153 |
- sink() |
|
154 |
- arraynames <- arraynames %>% |
|
155 |
- group_by(v2) %>% |
|
156 |
- mutate(index = 1:n()) %>% |
|
157 |
- mutate(v2 = ifelse(index > 1, paste0(v2, "_", index), v2)) %>% |
|
158 |
- select(-index) |
|
159 |
- } |
|
160 |
- |
|
161 |
- |
|
162 |
- ## get the data from the loop |
|
163 |
- ## extract the specific data from the data files |
|
164 |
- data <- data_files[[iden]] |
|
165 |
- |
|
166 |
- ## pick the spots per block from the data file |
|
167 |
- spotsperblock <- table(data$Block)[[1]] |
|
168 |
- sampleID <- |
|
169 |
- rep(arraynames$v2, each = spotsperblock * genepix_vars$blockspersample) |
|
170 |
- |
|
171 |
- |
|
172 |
- |
|
173 |
- Data1 <- data %>% |
|
174 |
- # assign respective sample number to each row |
|
175 |
- mutate( |
|
176 |
- sample = rep( |
|
177 |
- seq_len(genepix_vars$totsamples), |
|
178 |
- each = spotsperblock * genepix_vars$blockspersample |
|
179 |
- ), |
|
180 |
- # Bring in the sampleIDs..192 each sample |
|
181 |
- sampleID = sampleID, |
|
182 |
- # Abit of formating the Antigen names and concentration |
|
183 |
- Name = gsub(':', '', Name), |
|
184 |
- Name = gsub('\n', '', Name), |
|
185 |
- Name = gsub(' ', '', Name) |
|
186 |
- ) %>% |
|
187 |
- ##remove uneccessary concs |
|
188 |
- ## filter(!grepl('Landmark|Buffer|IgG', Name)) %>% |
|
189 |
- # group by iden and antigen name |
|
190 |
- group_by(sampleID, Name) |
|
191 |
- |
|
192 |
- if (paste0(genepix_vars$BG) %ni% names(Data1)) { |
|
193 |
- genepix_vars$BG <- |
|
194 |
- names(Data1)[names(Data1) %like% paste0(genepix_vars$BG, '$')] |
|
195 |
- } |
|
196 |
- |
|
197 |
- if (paste0(genepix_vars$FG) %ni% names(Data1)) { |
|
198 |
- genepix_vars$FG <- |
|
199 |
- names(Data1)[names(Data1) %like% paste0(genepix_vars$FG, '$')] |
|
200 |
- } |
|
201 |
- |
|
202 |
- #---------------------------------------------------------------------------------------------------- |
|
203 |
- ##save the MFI values of the Background |
|
204 |
- data1_bg <- |
|
205 |
- Data1 %>% dplyr::select( |
|
206 |
- sampleID, |
|
207 |
- antigen = Name, |
|
208 |
- Block, |
|
209 |
- FBG_Median = !!genepix_vars$FG , |
|
210 |
- BG_Median = !!genepix_vars$BG |
|
211 |
- ) %>% |
|
212 |
- mutate(replicate = 1:n()) %>% |
|
213 |
- filter( |
|
214 |
- !grepl( |
|
215 |
- '^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', |
|
216 |
- antigen |
|
217 |
- ) |
|
218 |
- ) |
|
219 |
- # combine Name and replicate |
|
220 |
- #---------------------------------------------------------------------------------------------------- |
|
221 |
- return(data1_bg) |
|
222 |
-} |
|
223 |
- |
|
224 |
- |
|
225 |
- |
|
226 |
- |
|
227 |
- |
|
228 |
-#' @title Plot background |
|
229 |
-#' |
|
230 |
-#' @param df A default dataset to use for plot. |
|
231 |
-#' @param bg_MFI A numeric \code{variable} describing which is the background MFI |
|
232 |
-#' @param x_axis The variable on the x axis |
|
233 |
-#' @param log_mfi a logical value indicating whether the MFI values should be |
|
234 |
-#' log transformed or not. |
|
235 |
-#' |
|
236 |
-#' @description A generic function for plotting of R objects. |
|
237 |
-#' @return A ggplot of background values |
|
238 |
-#' @export |
|
239 |
-#' @import ggpubr |
|
240 |
-#' @examples |
|
241 |
-#' ## Not run: |
|
242 |
-#' #After extracting the background using \code{\link{extract_bg}} we plot the data using |
|
243 |
-#' allData_bg <- readr::read_csv(system.file("extdata", "bg_example.csv", package="protGear")) |
|
244 |
-#' plot_bg(allData_bg, |
|
245 |
-#' x_axis = "antigen", |
|
246 |
-#' bg_MFI = "BG_Median", log_mfi = TRUE |
|
247 |
-#' ) |
|
248 |
-#' ## End(Not run) |
|
249 |
- |
|
250 |
-plot_bg <- function(df, |
|
251 |
- x_axis = "antigen", |
|
252 |
- bg_MFI = "BG_Median", |
|
253 |
- log_mfi = TRUE) { |
|
254 |
- ## create an id to help in having a numeric sample ID to sort your data |
|
255 |
- ## this is because all sampleIDs from the samples were not unique |
|
256 |
- ## rename the original sampleID sampleID2 |
|
257 |
- bg_MFI_sys <- rlang::sym(bg_MFI) |
|
258 |
- ### check if the .id exists and renane it to slide |
|
259 |
- if ('.id' %in% names(df)) { |
|
260 |
- df <- df %>% dplyr::rename(slide = .id) |
|
261 |
- } else |
|
262 |
- df <- df %>% dplyr::mutate(slide = "slide") |
|
263 |
- |
|
264 |
- bg_plot <- df %>% |
|
265 |
- dplyr::mutate(log_bg = log2(!!bg_MFI_sys)) |
|
266 |
- |
|
267 |
- # bg_plot$sampleID <- group_indices(.data =bg_plot ) |
|
268 |
- |
|
269 |
- if (log_mfi == TRUE) { |
|
270 |
- p_pubr <- ggboxplot( |
|
271 |
- data = bg_plot , |
|
272 |
- x = x_axis , |
|
273 |
- y = "log_bg", |
|
274 |
- facet.by = "replicate", |
|
275 |
- ncol = 1 |
|
276 |
- ) |
|
277 |
- p_pubr <- ggpar( |
|
278 |
- p_pubr, |
|
279 |
- font.tickslab = c(6, "#993333"), |
|
280 |
- xtickslab.rt = 45 , |
|
281 |
- ylab = "Background of the replicates (log2)" |
|
282 |
- ) |
|
283 |
- p_bg <- |
|
284 |
- ggplot(data = bg_plot , aes_string(x = x_axis , y = "log_bg")) + |
|
285 |
- geom_boxplot() + |
|
286 |
- facet_wrap( ~ replicate, ncol = 1) + |
|
287 |
- theme_light() + |
|
288 |
- theme(axis.text.x = element_text(angle = 45 , hjust = 1)) |
|
289 |
- |
|
290 |
- |
|
291 |
- |
|
292 |
- } else if (log_mfi == FALSE) { |
|
293 |
- p_pubr <- ggboxplot( |
|
294 |
- data = bg_plot , |
|
295 |
- x = x_axis , |
|
296 |
- y = bg_MFI, |
|
297 |
- facet.by = "replicate", |
|
298 |
- ncol = 1 |
|
299 |
- ) |
|
300 |
- p_pubr <- ggpar( |
|
301 |
- p_pubr, |
|
302 |
- font.tickslab = c(6, "#993333"), |
|
303 |
- xtickslab.rt = 45 , |
|
304 |
- ylab = "Background of the replicates raw" |
|
305 |
- ) |
|
306 |
- |
|
307 |
- p_bg <- |
|
308 |
- ggplot(data = bg_plot , aes_string(x = x_axis , y = bg_MFI)) + |
|
309 |
- geom_boxplot() + |
|
310 |
- facet_wrap( ~ replicate, ncol = 1) + |
|
311 |
- theme_light() + |
|
312 |
- theme(axis.text.x = element_text(angle = 45 , hjust = 1)) |
|
313 |
- } else { |
|
314 |
- # p_pubr <- ggboxplot(data = bg_plot , |
|
315 |
- # x="antigen" , y=bg_MFI, |
|
316 |
- # facet.by = "replicate",ncol=1) |
|
317 |
- # p_pubr <- ggpar(p_pubr,font.tickslab = c(8,"#993333"), |
|
318 |
- # xtickslab.rt = 45 , ylab = "Background of the replicates raw") |
|
319 |
- |
|
320 |
- p_bg <- |
|
321 |
- ggplot(data = bg_plot , aes_string(x = x_axis , y = bg_MFI)) + |
|
322 |
- geom_boxplot() + |
|
323 |
- facet_wrap( ~ replicate, ncol = 1) + |
|
324 |
- theme_light() + |
|
325 |
- theme(axis.text.x = element_text(angle = 45 , hjust = 1)) |
|
326 |
- } |
|
327 |
- return(p_pubr) |
|
328 |
-} |
|
329 |
- |
|
330 |
-#___________________________________________________ |
|
331 |
- |
|
332 |
- |
|
333 |
-#' Plot foreground and background values |
|
334 |
-#' @title plot_FB |
|
335 |
-#' @param df An object containing the data to which the plot is done. |
|
336 |
-#' @param antigen_name The \code{variable} describing which features/proteins/ |
|
337 |
-#' antibodies in the data should be used to plot |
|
338 |
-#' @param bg_MFI A numeric \code{variable} describing which is the background MFI |
|
339 |
-#' @param FG_MFI A numeric \code{variable} describing which is the foreground MFI |
|
340 |
-#' @param log_mfi a logical value indicating whether the MFI values should be |
|
341 |
-#' log transformed or not. |
|
342 |
-#' @description A generic function for plotting the background and foreground values. |
|
343 |
-#' @return a ggplot of foreground vs background MFI values |
|
344 |
-#' @export |
|
345 |
-#' @import ggplot2 dplyr |
|
346 |
-#' @examples |
|
347 |
-#' ## Not run: |
|
348 |
-#' #After extracting the background using \code{\link{extract_bg}} we plot the data using |
|
349 |
-#' allData_bg <- readr::read_csv(system.file("extdata", "bg_example.csv", package="protGear")) |
|
350 |
-#' plot_FB(allData_bg, |
|
351 |
-#' antigen_name = "antigen", |
|
352 |
-#' bg_MFI = "BG_Median", FG_MFI = "FBG_Median", log = FALSE |
|
353 |
-#' ) |
|
354 |
-#' ## End(Not run) |
|
355 |
-plot_FB <- |
|
356 |
- function(df, |
|
357 |
- antigen_name = "antigen", |
|
358 |
- bg_MFI = "BG_Median", |
|
359 |
- FG_MFI = "FBG_Median", |
|
360 |
- log_mfi = FALSE) { |
|
361 |
- ## create an id to help in having a numeric sample ID to sort your data |
|
362 |
- ## this is because all sampleIDs from the samples were not unique |
|
363 |
- ## rename the original sampleID sampleID2 |
|
364 |
- bg_MFI_sys <- rlang::sym(bg_MFI) |
|
365 |
- FB_MFI_sys <- rlang::sym(FG_MFI) |
|
366 |
- ### check if the .id exists and renane it to slide |
|
367 |
- if ('.id' %in% names(df)) { |
|
368 |
- df <- df %>% dplyr::rename(slide = .id) |
|
369 |
- } else |
|
370 |
- df <- df %>% dplyr::mutate(slide = "slide") |
|
371 |
- |
|
372 |
- bg_plot <- df %>% |
|
373 |
- mutate(log_bg = log2(!!bg_MFI_sys), |
|
374 |
- log_fb = log2(!!FB_MFI_sys)) |
|
375 |
- |
|
376 |
- |
|
377 |
- |
|
378 |
- if (log_mfi == TRUE) { |
|
379 |
- p <- ggplot(bg_plot , aes( |
|
380 |
- log_fb, |
|
381 |
- log_bg, |
|
382 |
- text = paste( |
|
383 |
- "Antigen: ", |
|
384 |
- antigen, |
|
385 |
- "<br>FG: $", |
|
386 |
- FBG_Median, |
|
387 |
- "<br>B: $", |
|
388 |
- BG_Median |
|
389 |
- ) |
|
390 |
- )) + |
|
391 |
- xlab("Foreground MFI") + ylab("Background MFI") + |
|
392 |
- geom_jitter() + |
|
393 |
- theme_light() |
|
394 |
- |
|
395 |
- |
|
396 |
- |
|
397 |
- } else if (log_mfi == FALSE) { |
|
398 |
- p <- ggplot(bg_plot , aes( |
|
399 |
- FBG_Median, |
|
400 |
- BG_Median, |
|
401 |
- text = paste( |
|
402 |
- "Antigen: ", |
|
403 |
- antigen, |
|
404 |
- "<br>FG: $", |
|
405 |
- FBG_Median, |
|
406 |
- "<br>B: $", |
|
407 |
- BG_Median |
|
408 |
- ) |
|
409 |
- )) + |
|
410 |
- xlab("Foreground MFI") + ylab("Background MFI") + |
|
411 |
- geom_jitter() + |
|
412 |
- theme_light() |
|
413 |
- } else { |
|
414 |
- p <- ggplot(bg_plot , aes( |
|
415 |
- FBG_Median, |
|
416 |
- BG_Median, |
|
417 |
- text = paste( |
|
418 |
- "Antigen: ", |
|
419 |
- antigen, |
|
420 |
- "<br>FG: $", |
|
421 |
- FBG_Median, |
|
422 |
- "<br>B: $", |
|
423 |
- BG_Median |
|
424 |
- ) |
|
425 |
- )) + |
|
426 |
- xlab("Foreground MFI") + ylab("Background MFI") + |
|
427 |
- geom_jitter() + |
|
428 |
- theme_light() |
|
429 |
- } |
|
430 |
- return(p) |
|
431 |
- } |
|
432 |
- |
|
433 |
- |
|
434 |
-#' Background correction |
|
435 |
-#' @title bg_correct |
|
436 |
-#' @param iden A character indicating the name of the object to be used under Data1 |
|
437 |
-#' @param Data1 A data frame with sample identifiers merged with micro array data. |
|
438 |
-#' @param genepix_vars A list of specific definitions of the experiment design. See \code{\link{array_vars}}. |
|
439 |
-#' @param method a description of the background correction to be used. Possible values are \code{"none","subtract_local", |
|
440 |
-#' "subtract_global","movingmin_bg","minimum_half","edwards" or "normexp"}. The default is \code{"subtract_local"}. |
|
441 |
-#' @details The function implements background correction methods developed by \code{\link[limma]{backgroundCorrect}}. But the |
|
442 |
-#' \code{minimum_half and movingmin_bg} uses the block of the protein array as the grid. If method="movingmin_bg" the minimum |
|
443 |
-#' background value within a block is subtracted. |
|
444 |
-#' If method="minimum_half" then any intensity which is negative after background subtraction is reset to be equal to half the minimum positive value in |
|
445 |
-#' a block. If method="movingmin_value" then any intensity which is negative after background subtraction is reset to the minimum positive value |
|
446 |
-#' in a block. For \code{edwards} we implement a similar algorithm with \code{limma::backgroundCorrect(method="edwards")} and for \code{'normexp'} |
|
447 |
-#' we use the saddle-point approximation to maximum likelihood, \code{\link[limma]{backgroundCorrect}} for more details. |
|
448 |
-#' @description A generic function to perform background correction. |
|
449 |
-#' @return A data frame with background corrected data |
|
450 |
-#' @export |
|
451 |
-#' @import dplyr limma |
|
452 |
-#' @importFrom rlang sym |
|
453 |
-#' @examples |
|
454 |
-#' ## Not run: |
|
455 |
-#' genepix_vars <- array_vars( |
|
456 |
-#' channel = "635", |
|
457 |
-#' chip_path = system.file("extdata", "array_data/machine1/", package="protGear"), |
|
458 |
-#' totsamples = 21, |
|
459 |
-#' blockspersample = 2, |
|
460 |
-#' mig_prefix = "_first", |
|
461 |
-#' machine = 1, |
|
462 |
-#' ## optional |
|
463 |
-#' date_process = "0520" |
|
464 |
-#' ) |
|
465 |
-#' raw_df <- readr::read_csv(system.file("extdata", "Data1_bg_sample.csv", package="protGear")) |
|
466 |
-#' bg_correct(iden="iden", |
|
467 |
-#' Data1 = raw_df, |
|
468 |
-#' genepix_vars = genepix_vars, method="subtract_local" |
|
469 |
-#' ) |
|
470 |
-#' ## End(Not run) |
|
471 |
-bg_correct <- |
|
472 |
- function(iden, Data1, genepix_vars, method = "subtract_local") { |
|
473 |
- #---------------------------------------------------------------------------------------------------- |
|
474 |
- if (paste0(genepix_vars$BG) %ni% names(Data1)) { |
|
475 |
- genepix_vars$BG <- |
|
476 |
- names(Data1)[names(Data1) %like% paste0(genepix_vars$BG, '$')] |
|
477 |
- genepix_vars$BG <- rlang::sym(genepix_vars$BG) |
|
478 |
- } |
|
479 |
- |
|
480 |
- if (paste0(genepix_vars$FG) %ni% names(Data1)) { |
|
481 |
- genepix_vars$FG <- |
|
482 |
- names(Data1)[names(Data1) %like% paste0(genepix_vars$FG, '$')] |
|
483 |
- genepix_vars$FG <- rlang::sym(genepix_vars$FG) |
|
484 |
- } |
|
485 |
- |
|
486 |
- |
|
487 |
- ##save the MFI values without formatting the background |
|
488 |
- data1_full_bg <- Data1 %>% |
|
489 |
- dplyr::select(sampleID, |
|
490 |
- antigen = Name, |
|
491 |
- FMedian = !!genepix_vars$FG) %>% |
|
492 |
- mutate(replicate = 1:n()) %>% |
|
493 |
- ## removing Land mark and Buffer |
|
494 |
- filter( |
|
495 |
- !grepl( |
|
496 |
- '^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', |
|
497 |
- antigen |
|
498 |
- ) |
|
499 |
- ) |
|
500 |
- # %>% spread(antigen, F635Median) |
|
501 |
- # combine Name and replicate |
|
502 |
- ## save in the background data folder files with higher background values |
|
503 |
- file_ident <- paste0(iden, "_raw_MFI_BG", ".csv") |
|
504 |
- |
|
505 |
- #create_dir(path = system.file("processe_data/raw_MFI_BG/")) |
|
506 |
- #write_csv(data1_full_bg ,system.file("processed_data/raw_MFI_BG/", 'file_ident', package="protGear")) |
|
507 |
- #---------------------------------------------------------------------------------------------------- |
|
508 |
- if (method == "none" | method == "") { |
|
509 |
- #---------------------------------------------------------------------------------------------------- |
|
510 |
- ##MFI values without subtracting the background |
|
511 |
- Data1 <- Data1 %>% |
|
512 |
- dplyr::select( |
|
513 |
- sampleID, |
|
514 |
- sample_array_ID, |
|
515 |
- antigen = Name, |
|
516 |
- FMedian = !!genepix_vars$FG , |
|
517 |
- BGMedian = !!genepix_vars$BG, |
|
518 |
- Block, |
|
519 |
- Column, |
|
520 |
- Row |
|
521 |
- ) %>% |
|
522 |
- mutate(FMedianBG_correct = FMedian) %>% |
|
523 |
- mutate(replicate = 1:n()) #%>% |
|
524 |
- #filter(!grepl('^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', antigen)) |
|
525 |
- #---------------------------------------------------------------------------------------------------- |
|
526 |
- |
|
527 |
- } else if (method == "subtract_local") { |
|
528 |
- ## this approach subtracts the local background estimated by the Array Jet Machine |
|
529 |
- #---------------------------------------------------------------------------------------------------- |
|
530 |
- ##save the MFI values with subtracting the background |
|
531 |
- Data1 <- Data1 %>% |
|
532 |
- dplyr::mutate(FMedianBG_correct = (!!genepix_vars$FG) - (!!genepix_vars$BG)) %>% |
|
533 |
- dplyr::select( |
|
534 |
- sampleID, |
|
535 |
- sample_array_ID, |
|
536 |
- antigen = Name, |
|
537 |
- FMedian = !!genepix_vars$FG, |
|
538 |
- BGMedian = !!genepix_vars$BG, |
|
539 |
- FMedianBG_correct, |
|
540 |
- Block, |
|
541 |
- Column, |
|
542 |
- Row |
|
543 |
- ) %>% |
|
544 |
- dplyr::mutate(replicate = 1:n()) |
|
545 |
- #%>% |
|
546 |
- # filter(!grepl('^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', antigen)) |
|
547 |
- #---------------------------------------------------------------------------------------------------- |
|
548 |
- |
|
549 |
- } else if (method == "subtract_global") { |
|
550 |
- ## this approach subracts the median of the backgrounds in a slide |
|
551 |
- #---------------------------------------------------------------------------------------------------- |
|
552 |
- ##save the MFI values with subtracting the background |
|
553 |
- Data1 <- Data1 %>% |
|
554 |
- mutate(FMedianBG_correct = !!genepix_vars$FG - global_BGMedian) %>% |
|
555 |
- dplyr::select( |
|
556 |
- sampleID, |
|
557 |
- sample_array_ID, |
|
558 |
- antigen = Name, |
|
559 |
- FMedian = !!genepix_vars$FG, |
|
560 |
- BGMedian = !!genepix_vars$BG, |
|
561 |
- FMedianBG_correct, |
|
562 |
- Block, |
|
563 |
- Column, |
|
564 |
- Row |
|
565 |
- ) %>% |
|
566 |
- mutate(replicate = 1:n()) |
|
567 |
- #%>% |
|
568 |
- # filter(!grepl('^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', antigen)) |
|
569 |
- #---------------------------------------------------------------------------------------------------- |
|
570 |
- } else if (method == "movingmin_bg") { |
|
571 |
- ## this is subtracted |
|
572 |
- Data1 <- Data1 %>% |
|
573 |
- mutate(FMedianBG_correct = !!genepix_vars$FG-!!genepix_vars$BG) %>% |
|
574 |
- ## this is generated while reading the array files using read_array_files function |
|
575 |
- mutate(FMedianBG_correct = !!genepix_vars$FG - minimum_BGMedian) %>% |
|
576 |
- dplyr::select( |
|
577 |
- sampleID, |
|
578 |
- sample_array_ID, |
|
579 |
- antigen = Name, |
|
580 |
- FMedian = !!genepix_vars$FG, |
|
581 |
- BGMedian = !!genepix_vars$BG, |
|
582 |
- FMedianBG_correct, |
|
583 |
- Block, |
|
584 |
- Column, |
|
585 |
- Row |
|
586 |
- ) %>% |
|
587 |
- mutate(replicate = 1:n()) |
|
588 |
- # %>% |
|
589 |
- # filter(!grepl('^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[bB][Uu][Ff][Ff][Ee][Rr]', antigen)) |
|
590 |
- |
|
591 |
- } else if (method == "minimum_half") { |
|
592 |
- ## this approach ensures all the MFI values are positive |
|
593 |
- ## if the MFI <0 after subtraction the MFI is set to the half of the minimum corrected intensities |
|
594 |
- #---------------------------------------------------------------------------------------------------- |
|
595 |
- ##save the MFI values with subtracting the background |
|
596 |
- Data1 <- Data1 %>% |
|
597 |
- mutate(FMedianBG_correct = !!genepix_vars$FG-!!genepix_vars$BG) %>% |
|
598 |
- dplyr::select( |
|
599 |
- sampleID, |
|
600 |
- sample_array_ID, |
|
601 |
- antigen = Name, |
|
602 |
- FMedian = !!genepix_vars$FG, |
|
603 |
- BGMedian = !!genepix_vars$BG, |
|
604 |
- FMedianBG_correct, |
|
605 |
- Block, |
|
606 |
- Column, |
|
607 |
- Row |
|
608 |
- ) %>% |
|
609 |
- group_by(Block) %>% |
|
610 |
- mutate(FMedianBG_correct = ifelse( |
|
611 |
- FMedianBG_correct < 0.1, |
|
612 |
- (minpositive(FMedianBG_correct) / |
|
613 |
- 2), |
|
614 |
- FMedianBG_correct |
|
615 |
- )) %>% |
|
616 |
- group_by(sampleID, antigen) %>% |
|
617 |
- mutate(replicate = 1:n()) |
|
618 |
- #---------------------------------------------------------------------------------------------------- |
|
619 |
- } else if (method == "minimum_value") { |
|
620 |
- ## this approach ensures all the MFI values are positive |
|
621 |
- ## if the MFI <0 after subtraction the MFI is set to the minimum of the corrected intensities |
|
622 |
- #---------------------------------------------------------------------------------------------------- |
|
623 |
- Data1 <- Data1 %>% |
|
624 |
- mutate(FMedianBG_correct = !!genepix_vars$FG-!!genepix_vars$BG) %>% |
|
625 |
- dplyr::select( |
|
626 |
- sampleID, |
|
627 |
- sample_array_ID, |
|
628 |
- antigen = Name, |
|
629 |
- FMedian = !!genepix_vars$FG, |
|
630 |
- BGMedian = !!genepix_vars$BG, |
|
631 |
- FMedianBG_correct, |
|
632 |
- Block, |
|
633 |
- Column, |
|
634 |
- Row |
|
635 |
- ) %>% |
|
636 |
- group_by(Block) %>% |
|
637 |
- mutate(FMedianBG_correct = ifelse(FMedianBG_correct < 0.1, |
|
638 |
- (minpositive( |
|
639 |
- FMedianBG_correct |
|
640 |
- )), FMedianBG_correct)) %>% |
|
641 |
- group_by(sampleID, antigen) %>% |
|
642 |
- mutate(replicate = 1:n()) |
|
643 |
- |
|
644 |
- #---------------------------------------------------------------------------------------------------- |
|
645 |
- } else if (method == "edwards") { |
|
646 |
- #a log-linear interpolation method is used to adjust lower intensities as in Edwards (2003). |
|
647 |
- Data1 <- Data1 %>% |
|
648 |
- mutate(FMedianBG_correct = !!genepix_vars$FG-!!genepix_vars$BG) |
|
649 |
- one <- matrix(1, nrow(Data1), 1) |
|
650 |
- delta.vec <- function(d, f = 0.1) { |
|
651 |
- ## mean(d < 1e-16, na.rm = TRUE) % of values that are negative |
|
652 |
- ## mean(d < 1e-16, na.rm = TRUE) * (1 + f) the % of values just above the negative values |
|
653 |
- ## gives the quartile cut off value of the threshold |
|
654 |
- quantile(d, |
|
655 |
- probs = mean(d < 1e-16, na.rm = TRUE) * (1 + f), |
|
656 |
- na.rm = TRUE) |
|
657 |
- } |
|
658 |
- #delta <- one %*% apply(as.matrix(Data1[['FMedianBG_correct']]), 2, delta.vec) |
|
659 |
- ## no need to multiply with 1 since its returning the same value and we want to implement in a data frame |
|
660 |
- delta <- |
|
661 |
- apply(as.matrix(Data1[['FMedianBG_correct']]), 2, delta.vec) |
|
662 |
- |
|
663 |
- ## each value its given its own value accordingly |
|
664 |
- ## this helps maintain the variation |
|
665 |
- Data1 <- Data1 %>% |
|
666 |
- dplyr::select( |
|
667 |
- sampleID, |
|
668 |
- sample_array_ID, |
|
669 |
- antigen = Name, |
|
670 |
- FMedian = !!genepix_vars$FG, |
|
671 |
- FMedianBG_correct, |
|
672 |
- BGMedian = !!genepix_vars$BG, |
|
673 |
- Block, |
|
674 |
- Column, |
|
675 |
- Row |
|
676 |
- ) %>% |
|
677 |
- group_by(Block) %>% |
|
678 |
- mutate(FMedianBG_correct = ifelse(FMedianBG_correct < delta, |
|
679 |
- (delta * exp( |
|
680 |
- 1 - (BGMedian + delta) / FMedian |
|
681 |
- )), FMedianBG_correct)) %>% |
|
682 |
- group_by(sampleID, antigen) %>% |
|
683 |
- dplyr::mutate(replicate = 1:n()) |
|
684 |
- } else if (method == "normexp") { |
|
685 |
- ##a convolution of normal and exponential distributions is fitted to the foreground intensities using |
|
686 |
- #the background intensities as a covariate, and the expected signal given the observed foreground becomes |
|
687 |
- #the corrected intensity. This results in a smooth monotonic transformation of the background subtracted |
|
688 |
- #intensities such that all the corrected intensities are positive. |
|
689 |
- ##Both norm exp are implemented in Limma for DNA micro array data |
|
690 |
- Data1 <- Data1 %>% |
|
691 |
- mutate(FMedianBG_correct = !!genepix_vars$FG-!!genepix_vars$BG) |
|
692 |
- E <- as.matrix(Data1[['FMedianBG_correct']]) |
|
693 |
- rownames(E) <- rownames(Data1) |
|
694 |
- |
|
695 |
- ## here we use Here "saddle" gives the saddle-point approximation to maximum likelihood from |
|
696 |
- # Ritchie et al (2007) and improved by Silver et al (2009) --> check limma for details |
|
697 |
- ## can we use offset--> updates |
|
698 |
- bg_correct <- |
|
699 |
- limma::backgroundCorrect.matrix( |
|
700 |
- E = E, |
|
701 |
- method = "auto", |
|
702 |
- offset = 0, |
|
703 |
- printer = NULL, |
|
704 |
- normexp.method = "saddle", |
|
705 |
- verbose = TRUE |
|
706 |
- ) |
|
707 |
- bg_correct <- data.frame(FMedianBG_correct = bg_correct) |
|
708 |
- |
|
709 |
- ## select the important variables and join with the background corrected data |
|
710 |
- Data1 <- Data1 %>% |
|
711 |
- dplyr::select( |
|
712 |
- sampleID, |
|
713 |
- sample_array_ID, |
|
714 |
- antigen = Name, |
|
715 |
- FMedian = !!genepix_vars$FG, |
|
716 |
- BGMedian = !!genepix_vars$BG, |
|
717 |
- Block, |
|
718 |
- Column, |
|
719 |
- Row |
|
720 |
- ) %>% |
|
721 |
- bind_cols(bg_correct) %>% |
|
722 |
- group_by(sampleID, antigen) %>% |
|
723 |
- dplyr::mutate(replicate = 1:n()) |
|
724 |
- } |
|
725 |
- #Data1 <- Data1 %>% rename(F635MedianB635=F635.Median...B635) |
|
726 |
- |
|
727 |
- return(Data1) |
|
728 |
- } |
|
729 |
- |
|
730 |
- |
|
731 |
- |
|
732 |
- |
|
733 |
-#' Merge sample ID with the array data |
|
734 |
-#' |
|
735 |
-#' @param iden A character indicating the name of the object to be used under data_files. |
|
736 |
-#' @param data_files A list of data objects with names utilised by iden. |
|
737 |
-#' @param genepix_vars A list of specific definitions of the experiment design. See \code{\link{array_vars}}. |
|
738 |
-#' @param method A description of the background correction to be used. See \code{\link{bg_correct}}. |
|
739 |
-#' @description A generic function that merges the protein data with the sample identifiers or sample names. If the file |
|
740 |
-#' does not have sample identifiers the function generates it automatically. |
|
741 |
-#' @return a data frame merged with corresponding sample ID's. The sample ID are specified in the sample ID files |
|
742 |
-#' @export |
|
743 |
-#' @import dplyr |
|
744 |
-#' @importFrom purrr set_names |
|
745 |
-#' @examples |
|
746 |
-#' ## Not run: |
|
747 |
-#' ### Define the genepix_vars |
|
748 |
-#' genepix_vars <- array_vars( |
|
749 |
-#' channel = "635", |
|
750 |
-#' chip_path = system.file("extdata", "array_data/machine1/", package="protGear"), |
|
751 |
-#' totsamples = 21, |
|
752 |
-#' blockspersample = 2, |
|
753 |
-#' mig_prefix = "_first", |
|
754 |
-#' machine = 1, |
|
755 |
-#' ## optional |
|
756 |
-#' date_process = "0520" |
|
757 |
-#' ) |
|
758 |
-#' |
|
759 |
-#' ## the path where the micro-array data is located |
|
760 |
-#' data_path <- paste0(genepix_vars$chip_path) |
|
761 |
-#' filenames <- list.files(genepix_vars$chip_path, |
|
762 |
-#' pattern = "*.txt$|*.gpr$", full.names = FALSE |
|
763 |
-#' ) |
|
764 |
-#' ## create a list of all the files |
|
765 |
-#' data_files <- purrr::map( |
|
766 |
-#' .x = filenames, |
|
767 |
-#' .f = read_array_files, |
|
768 |
-#' data_path = data_path, |
|
769 |
-#' genepix_vars = genepix_vars |
|
770 |
-#' ) |
|
771 |
-#' data_files <- purrr::set_names(data_files, purrr::map(filenames, name_of_files)) |
|
772 |
-#' ## merge the lab data with samples and perform bg correction |
|
773 |
-#' merge_sampleID(iden = "KK2-06", data_files = data_files, |
|
774 |
-#' genepix_vars =genepix_vars,method = "subtract_global" ) |
|
775 |
-#' ## End(Not run) |
|
776 |
-merge_sampleID <- function(iden, data_files, genepix_vars, method) |
|
777 |
-{ |
|
778 |
- ## read in the sample ID files |
|
779 |
- ## this can be pulled from a mysql table |
|
780 |
- if (file.exists(file.path(genepix_vars$sampleID_path, paste0(iden , ".csv")))) { |
|
781 |
- arraynames <- |
|
782 |
- read.csv( |
|
783 |
- file.path(genepix_vars$sampleID_path, paste0(iden , ".csv")) , |
|
784 |
- header = TRUE , |
|
785 |
- stringsAsFactors = FALSE , |
|
786 |
- colClasses = "character" |
|
787 |
- ) |
|
788 |
- } else{ |
|
789 |
- warning(iden, |
|
790 |
- ".csv Not found in the sampleID files", |
|
791 |
- genepix_vars$sampleID_path) |
|
792 |
- arraynames <- |
|
793 |
- data.frame( |
|
794 |
- v1 = (1:genepix_vars$totsamples) , |
|
795 |
- v2 = paste0("SID_gen", 1:genepix_vars$totsamples), |
|
796 |
- barcode = iden |
|
797 |
- ) |
|
798 |
- } |
|
799 |
- |
|
800 |
- ## replicate to the number of blocks |
|
801 |
- ## make sure the block is arranged before merging with the data file |
|
802 |
- arraynames <- arraynames %>% |
|
803 |
- dplyr::select(v1, v2) %>% |
|
804 |
- arrange(as.numeric(v1)) |
|
805 |
- |
|
806 |
- ## capture errors for same sample ID in a slide |
|
807 |
- if (length(unique(arraynames$v2)) < genepix_vars$totsamples) { |
|
808 |
- sink("log_replicates.txt" , append = TRUE) |
|
809 |
- warning("Most likely there is a repeated sample name for " , iden) |
|
810 |
- sink() |
|
811 |
- arraynames <- arraynames %>% |
|
812 |
- group_by(v2) %>% |
|
813 |
- mutate(index = 1:n()) %>% |
|
814 |
- mutate(v2 = ifelse(index > 1, paste0(v2, "_", index), v2)) %>% |
|
815 |
- select(-index) |
|
816 |
- } |
|
817 |
- |
|
818 |
- |
|
819 |
- ## get the data from the loop |
|
820 |
- ## extract the specific data from the data files |
|
821 |
- data <- data_files[[iden]] |
|
822 |
- |
|
823 |
- ## pick the spots per block from the data file |
|
824 |
- spotsperblock <- table(data$Block)[[1]] |
|
825 |
- sampleID <- |
|
826 |
- rep(arraynames$v2, each = spotsperblock * genepix_vars$blockspersample) |
|
827 |
- sample_array_ID <- |
|
828 |
- rep(arraynames$v1, each = spotsperblock * genepix_vars$blockspersample) |
|
829 |
- |
|
830 |
- |
|
831 |
- Data1 <- data %>% |
|
832 |
- # assign respective sample number to each row |
|
833 |
- mutate( |
|
834 |
- sample = rep( |
|
835 |
- 1:genepix_vars$totsamples, |
|
836 |
- each = spotsperblock * genepix_vars$blockspersample |
|
837 |
- ), |
|
838 |
- # Bring in the sampleIDs..192 each sample |
|
839 |
- sampleID = sampleID, |
|
840 |
- sample_array_ID = sample_array_ID, |
|
841 |
- # Abit of formating the Antigen names and concentration |
|
842 |
- Name = gsub(':', '', Name), |
|
843 |
- Name = gsub('\n', '', Name), |
|
844 |
- Name = gsub(' ', '', Name) |
|
845 |
- ) %>% |
|
846 |
- ##remove uneccessary concs |
|
847 |
- ## filter(!grepl('Landmark|Buffer|IgG', Name)) %>% |
|
848 |
- # group by iden and antigen name |
|
849 |
- group_by(sampleID, Name) |
|
850 |
- |
|
851 |
- ## DO the background correction |
|
852 |
- ## specify the |
|
853 |
- Data1 <- bg_correct(iden, Data1 , genepix_vars, method = method) |
|
854 |
- Data1 <- Data1 %>% mutate(iden = iden) |
|
855 |
- return(Data1) |
|
856 |
-} |
|
857 |
- |
|
858 |
- |
|
859 |
- |
|
860 |
-#' Read a gpr file to visualize |
|
861 |
-#' |
|
862 |
-#' @param infile a .gpr file to be used to visualize the expression intensities of the slide spots |
|
863 |
-#' |
|
864 |
-#' @return a data frame to visualize the background or foreground values |
|
865 |
-#' @export |
|
866 |
-#' @importFrom data.table fread |
|
867 |
-#' @examples |
|
868 |
-#' ## Not run: |
|
869 |
-#' read_array_visualize(infile = system.file("extdata", |
|
870 |
-#' "/array_data/machine1/KK2-06.txt", package="protGear")) |
|
871 |
-#' ## End(Not run) |
|
872 |
-read_array_visualize <- function(infile) { |
|
873 |
- x <- grep('Block.*Column|Column.*Block', readLines(infile)) |
|
874 |
- # d_f <- read.table(inFile$datapath,skip=x-1, header = T) |
|
875 |
- d_f <- data.table::fread(infile, skip = x - 1, header = TRUE) |
|
876 |
- return(d_f) |
|
877 |
-} |
|
878 |
- |
|
879 |
-#' Visualize the slide mimicking the original scan image. |
|
880 |
-#' |
|
881 |
-#' @param infile a .gpr file to be used to visualize the expression intensities of the slide spots |
|
882 |
-#' @param MFI_var the MFI variable to plot, can be either the background or foreground value |
|
883 |
-#' @param d_f a data frame with array data |
|
884 |
-#' @param interactive a logical to specify whether an interactive graph is returned or not |
|
885 |
-#' |
|
886 |
-#' @import htmltools ggplot2 |
|
887 |
-#' @importFrom plotly ggplotly |
|
888 |
-#' @return A ggplot of slide foreground values |
|
889 |
-#' @export |
|
890 |
-#' |
|
891 |
-#' @examples |
|
892 |
-#' ## Not run: |
|
893 |
-#' visualize_slide( |
|
894 |
-#' infile = system.file("extdata", "/array_data/machine1/KK2-06.txt", package="protGear"), |
|
895 |
-#' MFI_var = "B635 Median" |
|
896 |
-#' ) |
|
897 |
-#' ## End(Not run) |
|
898 |
-visualize_slide <- |
|
899 |
- function(infile, |
|
900 |
- MFI_var, |
|
901 |
- interactive = FALSE, |
|
902 |
- d_f = NA) { |
|
903 |
- ## d_f only used for the shiny app |
|
904 |
- if (is.na(d_f)) { |
|
905 |
- d_f <- read_array_visualize(infile) |
|
906 |
- |
|
907 |
- d_f <- d_f %>% |
|
908 |
- group_by(Block) %>% |
|
909 |
- mutate( |
|
910 |
- meanX = mean(X), |
|
911 |
- meanY = mean(Y), |
|
912 |
- maxY = max(Y), |
|
913 |
- maxX = max(X), |
|
914 |
- minY = min(Y), |
|
915 |
- minX = min(X) |
|
916 |
- ) |
|
917 |
- } |
|
918 |
- ## define mid points to put the block labels |
|
919 |
- MFI_var_sys <- rlang::sym(MFI_var) |
|
920 |
- mid <- median(log(d_f[[MFI_var]])) |
|
921 |
- labels <- sprintf( |
|
922 |
- "<b>%s</b><br> MFI= %s ", |
|
923 |
- d_f$Name, |
|
924 |
- formatC(d_f$`F635 Median`, format = "d", big.mark = ",") |
|
925 |
- ) %>% |
|
926 |
- lapply(htmltools::HTML) |
|
927 |
- |
|
928 |
- point_size <- 0.5 |
|
929 |
- if (interactive == FALSE) |
|
930 |
- point_size <- 1 |
|
931 |
- ## plot the visual slide |
|
932 |
- p <- ggplot(d_f, aes(x = X, y = -Y, text = labels)) + |
|
933 |
- #geom_rect(aes(xmin = minX, xmax = maxX, ymin = -minY, ymax = -maxY),color = "black",alpha=0.0001,fill="blue") + |
|
934 |
- geom_point(size = point_size, aes_string(colour = sprintf("log(`%s`)", MFI_var))) + |
|
935 |
- theme_void() + |
|
936 |
- theme(legend.position = "none") + |
|
937 |
- scale_color_gradient2( |
|
938 |
- midpoint = mid, |
|
939 |
- low = "blue", |
|
940 |
- mid = "white", |
|
941 |
- high = "red", |
|
942 |
- space = "Lab" |
|
943 |
- ) + |
|
944 |
- geom_text(aes( |
|
945 |
- x = meanX, |
|
946 |
- y = -meanY, |
|
947 |
- label = paste("Block", Block) |
|
948 |
- ), |
|
949 |
- color = "black", |
|
950 |
- size = 2) |
|
951 |
- |
|
952 |
- if (interactive == FALSE) { |
|
953 |
- return(p) |
|
954 |
- } else if (interactive == TRUE) { |
|
955 |
- p <- ggplotly(p, tooltip = 'text') |
|
956 |
- return(p) |
|
957 |
- } |
|
958 |
- } |
|
959 |
- |
|
960 |
- |
|
961 |
-#' Visualize the slide mimicking the original scan image using a 2d plot. |
|
962 |
-#' |
|
963 |
-#' @param infile - a .gpr file to be used to visualize the expression intensities of the slide spots |
|
964 |
-#' @param MFI_var the MFI variable to plot, can be either the background or foreground value |
|
965 |
-#' @param d_f a data frame with array data |
|
966 |
-#' |
|
967 |
-#' @return A 2d plot of either the background or foreground values |
|
968 |
-#' @export |
|
969 |
-#' @import ggplot2 |
|
970 |
-#' @examples |
|
971 |
-#' ## Not run: |
|
972 |
-#' visualize_slide_2d( |
|
973 |
-#' infile = system.file("extdata", "/array_data/machine1/KK2-06.txt", package="protGear"), |
|
974 |
-#' MFI_var = "B635 Median" |
|
975 |
-#' ) |
|
976 |
-#' ## End(Not run) |
|
977 |
-visualize_slide_2d <- function(infile, MFI_var , d_f = NA) { |
|
978 |
- if (is.na(d_f)) { |
|
979 |
- d_f <- read_array_visualize(infile) |
|
980 |
- |
|
981 |
- d_f <- d_f %>% |
|
982 |
- group_by(Block) %>% |
|
983 |
- mutate( |
|
984 |
- meanX = mean(X), |
|
985 |
- meanY = mean(Y), |
|
986 |
- maxY = max(Y), |
|
987 |
- maxX = max(X), |
|
988 |
- minY = min(Y), |
|
989 |
- minX = min(X) |
|
990 |
- ) |
|
991 |
- } |
|
992 |
- |
|
993 |
- |
|
994 |
- mid <- median(log(d_f[[MFI_var]])) |
|
995 |
- ggplot(data = d_f, |
|
996 |
- aes_string( |
|
997 |
- x = 'X', |
|
998 |
- y = sprintf("-%s", 'Y'), |
|
999 |
- #-Y, |
|
1000 |
- z = sprintf("log(`%s`)", MFI_var) |
|
1001 |
- )) + #log(`F635 Median`) |
|
1002 |
- ## we have 24 vs 8 per block |
|
1003 |
- #stat_summary_2d(fun = median ,binwidth = c(40,120)) + |
|
1004 |
- stat_summary_2d(fun = median) + |
|
1005 |
- scale_fill_gradient2( |
|
1006 |
- midpoint = mid, |
|
1007 |
- low = "blue", |
|
1008 |
- mid = "white", |
|
1009 |
- high = "red", |
|
1010 |
- space = "Lab" |
|
1011 |
- ) + |
|
1012 |
- theme_void() + |
|
1013 |
- theme(legend.position = "none") + |
|
1014 |
- geom_text(aes( |
|
1015 |
- x = meanX, |
|
1016 |
- y = -meanY, |
|
1017 |
- label = paste("Block", Block) |
|
1018 |
- ), |
|
1019 |
- color = "black", |
|
1020 |
- size = 4) |
|
1021 |
-} |
1022 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "7E08C132", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/cv_estimation_tag_functions.R", |
|
4 |
- "project_path": "R/cv_estimation_tag_functions.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647521308629.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 12, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "326,0", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647521319, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647521319203, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,326 +0,0 @@ |
1 |
-#' Coefficient of Variation |
|
2 |
-#' @title cv_estimation |
|
3 |
-#' |
|
4 |
-#' @param dataC A dataset a data frame with feature variables to be used |
|
5 |
-#' @param lab_replicates A numeric value indicating the number of lab replicates. |
|
6 |
-#' @param sampleID_var A character string containing the name of the sample identifier variable. Default set to 'sampleID' |
|
7 |
-#' @param antigen_var A character string containing the name of the features/protein variable. Default to 'antigen' |
|
8 |
-#' @param replicate_var A character string containing the name of the replicate variable. Default to 'replicate' |
|
9 |
-#' @param mfi_var A character string containing the name of the variable with MFI value.Assuming background correction is done already. |
|
10 |
-#' Default to 'FMedianBG_correct' |
|
11 |
-#' @param cv_cut_off Optional value indicating the cut off of flagging CV's. Default set at 20. |
|
12 |
-#' @description A function to calculate the CV for the technical lab replicates. The default values are set as per the object names generated by machine |
|
13 |
-#' @import dplyr tidyr |
|
14 |
-#' @importFrom rlang sym |
|
15 |
-#' @importFrom tidyr gather spread |
|
16 |
-#' @return A data frame where CV's of the replicates have been calculated |
|
17 |
-#' @export |
|
18 |
-#' |
|
19 |
-#' @examples |
|
20 |
-#' dataC <- readr::read_csv(system.file("extdata", "dataC.csv", package="protGear")) |
|
21 |
-#' ## this file has 3 lab replicates and the default names |
|
22 |
-#' cv_estimation(dataC ,lab_replicates=3) |
|
23 |
-cv_estimation <- |
|
24 |
- function(dataC , |
|
25 |
- lab_replicates , |
|
26 |
- sampleID_var = 'sampleID', |
|
27 |
- antigen_var = 'antigen' , |
|
28 |
- replicate_var = 'replicate' , |
|
29 |
- mfi_var = 'FMedianBG_correct' , |
|
30 |
- cv_cut_off = 20) { |
|
31 |
- if (lab_replicates == 1) { |
|
32 |
- dataC <- dataC %>% |
|
33 |
- dplyr::filter( |
|
34 |
- !grepl( |
|
35 |
- '^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]|^[Bb][Uu][Ff][Ff][Ee][Rr]', |
|
36 |
- !!antigen_var |
|
37 |
- ) |
|
38 |
- ) %>% |
|
39 |
- filter(replicate <= lab_replicates) |
|
40 |
- |
|
41 |
- # combine Name and replicate |
|
42 |
- #tidyr::unite(col=antigen,antigen,replicate) |
|
43 |
- # dataC <- dataC %>% |
|
44 |
- # dplyr::select(sampleID,sample_array_ID, antigen,iden,FMedianBG_correct) |
|
45 |
- ## get the name of the file |
|
46 |
- iden <- unique(dataC$iden) |
|
47 |
- # pick the values after the last underscore |
|
48 |
- ## to get the replicate |
|
49 |
- #dataC$replicate <- sub(".*_(.*)", "\\1", dataC$antigen) |
|
50 |
- |
|
51 |
- ## create a wide data to |
|
52 |
- if (length(unique(dataC$replicate)) > lab_replicates) { |
|
53 |
- try(stop("Some antigens seems to be repeated in a mini array for", iden) , outFile = stdout()) |
|
54 |
- error_replicates(iden) |
|
55 |
- } |
|
56 |
- |
|
57 |
- } else if (lab_replicates > 1) { |
|
58 |
- ## Exclude the land mark and Buffer |
|
59 |
- dataC <- dataC %>% |
|
60 |
- dplyr::filter( |
|
61 |
- !grepl( |
|
62 |
- '^[Ll][Aa][Nn][Dd][Mm][Aa][Rr][Kk]| ^[Bb][Uu][Ff][Ff][Ee][Rr]', |
|
63 |
- !!antigen_var |
|
64 |
- ) |
|
65 |
- ) %>% |
|
66 |
- dplyr::filter(!!sym(replicate_var) <= lab_replicates) |
|
67 |
- |
|
68 |
- #combine Name and replicate |
|
69 |
- #tidyr::unite(col=antigen,antigen,replicate) |
|
70 |
- |
|
71 |
- ## get the name of the file |
|
72 |
- iden <- unique(dataC$iden) |
|
73 |
- # pick the values after the last underscore |
|
74 |
- ## to get the replicate |
|
75 |
- # dataC$replicate <- sub(".*_(.*)", "\\1", dataC$antigen) |
|
76 |
- ### pick the nname of the antigen |
|
77 |
- ## values until the last underscore |
|
78 |
- #dataC$antigen <- sub("\\_[^\\_]*$" , "", dataC$antigen) |
|
79 |
- |
|
80 |
- ## create a wide data to |
|
81 |
- |
|
82 |
- if (length(unique(dataC$replicate)) > lab_replicates) { |
|
83 |
- try(stop("The replicates per antigen per sample are more than expected for ", |
|
84 |
- iden) , outFile = stdout()) |
|
85 |
- error_replicates(iden) |
|
86 |
- } else |
|
87 |
- (warning("The replicates are as expected per sample per antigen")) |
|
88 |
- |
|
89 |
- |
|
90 |
- ## reshaping the data |
|
91 |
- Data3 <- dataC %>% |
|
92 |
- dplyr::select(contains('iden'),contains('sample_array_ID'),!!sampleID_var, !!mfi_var, !!replicate_var,!!antigen_var) %>% |
|
93 |
- spread(replicate, FMedianBG_correct) |
|
94 |
- |
|
95 |
- |
|
96 |
- ### group the antigens and calculate their CV values |
|
97 |
- ## the CV values > 20, get the minimum mean of each of the two mfi values and use it to calculate CV |
|
98 |
- ## select the one with lowest CV |
|
99 |
- dataC <- dataC %>% |
|
100 |
- dplyr::group_by_at(c(antigen_var,sampleID_var)) %>% |
|
101 |
- ###mean and then for each grouping of 2 |
|
102 |
- summarize(meanX=mean(get(mfi_var) , na.rm=TRUE), |
|
103 |
- meanX2_X3=mean(get(mfi_var)[-1] , na.rm=TRUE), |
|
104 |
- meanX1_X3=mean(get(mfi_var)[-2] , na.rm=TRUE), |
|
105 |
- meanX1_X2=mean(get(mfi_var)[-3] , na.rm=TRUE), |
|
106 |
- ###standard deviation and then for each grouping of 2 |
|
107 |
- sdX=round(sd(get(mfi_var) , na.rm=TRUE),2), |
|
108 |
- sdX2_X3=round(sd(get(mfi_var)[-1] , na.rm=TRUE),2), |
|
109 |
- sdX1_X3=round(sd(get(mfi_var)[-2] , na.rm=TRUE),2), |
|
110 |
- sdX1_X2=round(sd(get(mfi_var)[-3] , na.rm=TRUE),2), |
|
111 |
- ### cv |
|
112 |
- CVX=(round(sdX/meanX,4))*100 , |
|
113 |
- CVX2_X3=(round(sdX2_X3/meanX2_X3,4))*100 , |
|
114 |
- CVX1_X3=(round(sdX1_X3/meanX1_X3,4))*100 , |
|
115 |
- CVX1_X2=(round(sdX1_X2/meanX1_X2,4))*100) %>% |
|
116 |
- mutate(cvCat_all = ifelse(CVX>=0 & CVX<=cv_cut_off , paste0("CV <= ",cv_cut_off), |
|
117 |
- ifelse(CVX >cv_cut_off & CVX <101, paste0("CV > ",cv_cut_off),"Others"))) %>% |
|
118 |
- mutate(cvSelected_all = ifelse(CVX>=0 & CVX<=cv_cut_off , CVX, |
|
119 |
- ifelse(CVX >cv_cut_off | CVX<=0 ,pmin(CVX2_X3, CVX1_X3,CVX1_X2 , na.rm = TRUE),NA))) |
|
120 |
- dataC <- dataC %>% |
|
121 |
- left_join(Data3, by=c(antigen_var,sampleID_var)) %>% |
|
122 |
- select(antigen, sampleID, sample_array_ID,everything()) |
|
123 |
- } |
|
124 |
- return(dataC) |
|
125 |
- } |
|
126 |
- |
|
127 |
- |
|
128 |
-#' Select set of replicates with the best CV |
|
129 |
-#' @title best CV estimation |
|
130 |
-#' |
|
131 |
-#' @param dataCV A data frame |
|
132 |
-#' @param slide_id A character string containing the identifier of the data frame variable. |
|
133 |
-#' @param cv_cut_off a numeric value for the CV cut off. Should be between 0-100 |
|
134 |
-#' @param lab_replicates A numeric value indicating the number of lab replicates. |
|
135 |
-#' |
|
136 |
-#' @description A function to select the best CV by combining the replicates in duplicates. The function has been build for up to to 3 replicates so far |
|
137 |
-#' @import dplyr tidyr |
|
138 |
-#' @importFrom tidyr gather spread |
|
139 |
-#' @importFrom dplyr mutate select |
|
140 |
-#' @importFrom readr read_csv |
|
141 |
-#' @importFrom plyr . |
|
142 |
-#' @return A data frame with the best CV's estimated |
|
143 |
-#' @export |
|
144 |
-#' |
|
145 |
-#' @examples |
|
146 |
-#' dataC <- readr::read_csv(system.file("extdata", "dataC.csv", package="protGear")) |
|
147 |
-#' ## this file has 3 lab replicates and the default names |
|
148 |
-#' dataCV <- cv_estimation(dataC ,lab_replicates=3) |
|
149 |
-#' best_CV_estimation(dataCV,slide_id = "iden", lab_replicates = 3, cv_cut_off = 20) |
|
150 |
-best_CV_estimation <- |
|
151 |
- function(dataCV, |
|
152 |
- slide_id, |
|
153 |
- lab_replicates , |
|
154 |
- cv_cut_off) { |
|
155 |
- if (lab_replicates > 1) { |
|
156 |
- ### Get the mean that corresponds to the lowest CV |
|
157 |
- iden <- unique(dataCV[[slide_id]]) |
|
158 |
- data_best_CV <- as.data.frame(dataCV) |
|
159 |
- |
|
160 |
- ## changing NaN values to 0 to facilitate computation |
|
161 |
- is.nan.data.frame <- |
|
162 |
- function(x) |
|
163 |
- do.call(cbind, lapply(x, is.nan)) |
|
164 |
- data_best_CV[is.nan.data.frame(data_best_CV)] <- 0 |
|
165 |
- |
|
166 |
- ## get the minumum cv and put the value of the as a string variable |
|
167 |
- ## ie CVX3 selected meanX3 value will be created |
|
168 |
- ## might bring issues here if the subtraction has a NA or its missing for the prescan |
|
169 |
- data_best_CV <- data_best_CV %>% |
|
170 |
- mutate(x = colnames(.[, c("CVX2_X3", "CVX1_X3", "CVX1_X2")]) |
|
171 |
- [apply(.[, c("CVX2_X3", "CVX1_X3", "CVX1_X2")], 1, which.min)]) %>% |
|
172 |
- dplyr::mutate(xbar = paste0("meanX", gsub("CVX", "", x))) |
|
173 |
- |
|
174 |
- ## get the actual value of the mean that corresponds to that |
|
175 |
- #http://stackoverflow.com/questions/43762869/get-row-value-corresponding-to-a-column-name |
|
176 |
- data_best_CV <- data_best_CV %>% |
|
177 |
- mutate(row = 1:n()) %>% |
|
178 |
- # select(-c(`1`,`2`,`3`,iden)) %>% |
|
179 |
- gather(prop, val, meanX1_X2:meanX2_X3) %>% |
|
180 |
- group_by(row) %>% |
|
181 |
- mutate(selected = val[xbar == prop]) %>% |
|
182 |
- spread(prop, val) %>% dplyr::select(-row) |
|
183 |
- |
|
184 |
- |
|
185 |
- ##create the final selected mean in the data set |
|
186 |
- data_best_CV <- data_best_CV %>% |
|
187 |
- mutate( |
|
188 |
- meanSelected = ifelse( |
|
189 |
- CVX >= 0 & CVX <= cv_cut_off , |
|
190 |
- meanX, |
|
191 |
- ifelse((CVX > cv_cut_off & |
|
192 |
- CVX < 101) | CVX <= 0 , selected, NA) |
|
193 |
- ), |
|
194 |
- mean_best_CV = selected |
|
195 |
- ) %>% |
|
196 |
- dplyr::select(-xbar, -selected) |
|
197 |
- data_best_CV <- data_best_CV %>% |
|
198 |
- mutate( |
|
199 |
- best_CV = pmin(CVX2_X3, CVX1_X3, CVX1_X2, na.rm = TRUE) , |
|
200 |
- best_CV_cat = ifelse( |
|
201 |
- best_CV >= 0 & best_CV <= cv_cut_off , |
|
202 |
- paste0("CV <= ", cv_cut_off), |
|
203 |
- ifelse( |
|
204 |
- best_CV > cv_cut_off & |
|
205 |
- best_CV < 101, |
|
206 |
- paste0("CV > ", cv_cut_off), |
|
207 |
- "Others" |
|
208 |
- ) |
|
209 |
- ) |
|
210 |
- ) |
|
211 |
- |
|
212 |
- } else if (lab_replicates == 1) { |
|
213 |
- ### Get the mean that corresponds to the lowest CV |
|
214 |
- iden <- unique(dataCV[[slide_id]]) |
|
215 |
- data_best_CV <- as.data.frame(dataCV) |
|
216 |
- |
|
217 |
- ## changing NaN values to 0 to facilitate computation |
|
218 |
- is.nan.data.frame <- |
|
219 |
- function(x) |
|
220 |
- do.call(cbind, lapply(x, is.nan)) |
|
221 |
- data_best_CV[is.nan.data.frame(data_best_CV)] <- 0 |
|
222 |
- |
|
223 |
- ### generate the variabrl |
|
224 |
- data_best_CV <- data_best_CV %>% |
|
225 |
- mutate(mean_best_CV = FMedianBG_correct) |
|
226 |
- } |
|
227 |
- |
|
228 |
- ## return the dataset of interest |
|
229 |
- return(data_best_CV) |
|
230 |
- } |
|
231 |
-#' |
|
232 |
-#' |
|
233 |
-#' \\\_End_Function_\\\ # |
|
234 |
-#' |
|
235 |
-#' |
|
236 |
- |
|
237 |
- |
|
238 |
- |
|
239 |
- |
|
240 |
-#' Subtract the purification TAG data |
|
241 |
-#' @title tag_subtract |
|
242 |
-#' |
|
243 |
-#' @param tag_antigens A character vector with the names of proteins or antigens used as TAG. |
|
244 |
-#' @param mean_best_CV_var A character string containing the identifier of the variable with the MFI values. |
|
245 |
-#' @param dataC_mfi A dataframe |
|
246 |
-#' @param sampleID_var A character string containing the name of the sample identifier variable. Default set to 'sampleID' |
|
247 |
-#' @param antigen_var A character string containing the name of the features/protein variable. Default to 'antigen' |
|
248 |
-#' @param batch_vars A list of characters identifying variables in dataC_mfi for indicating batch. |
|
249 |
-#' @param tag_file A data frame with variables \code{antigen, TAG, TAG_name } to show the TAG for the different antigens or proteins in dataC_mfi |
|
250 |
-#' @import dplyr tidyr |
|
251 |
-#' @importFrom tidyr gather spread |
|
252 |
-#' @importFrom rlang := !! UQ |
|
253 |
-#' @return A data frame of TAG values subtracted |
|
254 |
-#' @export |
|
255 |
-#' |
|
256 |
-#' @examples |
|
257 |
-#' tag_file <- readr::read_csv(system.file("extdata", "TAG_antigens.csv", package="protGear")) |
|
258 |
-#' tag_antigens <- c("CD4TAG", "GST", "MBP") |
|
259 |
-#' batch_vars <- list(machine = "m1", day = "0520") |
|
260 |
-#' dataC <- readr::read_csv(system.file("extdata", "dataC.csv", package="protGear")) |
|
261 |
-#' ## this file has 3 lab replicates and the default names |
|
262 |
-#' dataCV <- cv_estimation(dataC ,lab_replicates=3) |
|
263 |
-#' dataCV_best2 <- best_CV_estimation(dataCV,slide_id = "iden", lab_replicates = 3, cv_cut_off = 20) |
|
264 |
-#' tag_subtract(dataCV_best2,tag_antigens=tag_antigens, mean_best_CV_var="mean_best_CV", |
|
265 |
-#' tag_file = tag_file,antigen_var = "antigen", batch_vars = batch_vars) |
|
266 |
-tag_subtract <- |
|
267 |
- function(dataC_mfi, |
|
268 |
- tag_antigens, |
|
269 |
- mean_best_CV_var, |
|
270 |
- tag_file, |
|
271 |
- batch_vars, |
|
272 |
- sampleID_var = 'sampleID', |
|
273 |
- antigen_var = 'antigen') { |
|
274 |
- mean_best_CV_var <- rlang::sym(paste0(mean_best_CV_var)) |
|
275 |
- sampleID_var <- rlang::sym(paste0(sampleID_var)) |
|
276 |
- antigen_var <- rlang::sym(paste0(antigen_var)) |
|
277 |
- ## remove the tag for the data |
|
278 |
- |
|
279 |
- dataC_mfi_tags <- dataC_mfi %>% |
|
280 |
- dplyr::filter(UQ(antigen_var) %in% tag_antigens) %>% |
|
281 |
- ungroup() %>% |
|
282 |
- dplyr::select(!!sampleID_var,!!antigen_var ,!!mean_best_CV_var) %>% |
|
283 |
- ##change all the negative TAG values to zero |
|
284 |
- mutate(!!mean_best_CV_var := ifelse(!!mean_best_CV_var < 0, 0, !!mean_best_CV_var)) %>% |
|
285 |
- spread(!!antigen_var ,!!mean_best_CV_var) |
|
286 |
- |
|
287 |
- ## join the data with the fused antigen name for subtraction |
|
288 |
- dataC_mfi <- |
|
289 |
- left_join(dataC_mfi , tag_file, by = paste(antigen_var)) |
|
290 |
- |
|
291 |
- dataC_mfi <- dataC_mfi %>% ungroup() %>% |
|
292 |
- dplyr::select( |
|
293 |
- !!sampleID_var , |
|
294 |
- contains('sample_array_ID'), |
|
295 |
- !!sampleID_var , |
|
296 |
- TAG, |
|
297 |
- everything() , |
|
298 |
- -row |
|
299 |
- ) |
|
300 |
- dataC_mfi <- dataC_mfi %>% ungroup() %>% |
|
301 |
- left_join(x = dataC_mfi , |
|
302 |
- y = dataC_mfi_tags , |
|
303 |
- by = paste(sampleID_var)) %>% |
|
304 |
- mutate(TAG_mfi = NA) |
|
305 |
- vars_in <- names(dataC_mfi) |
|
306 |
- |
|
307 |
- |
|
308 |
- ## mutate the tag_mfi variable |
|
309 |
- for (i in tag_antigens) { |
|
310 |
- tag_var <- rlang::sym(paste0(i)) |
|
311 |
- dataC_mfi <- dataC_mfi %>% |
|
312 |
- mutate(TAG_mfi = ifelse(TAG_name == i & |
|
313 |
- is.na(TAG_mfi), !!tag_var, TAG_mfi)) |
|
314 |
- } |
|
315 |
- |
|
316 |
- ## subtract the TAG values depending on the TAG of |
|
317 |
- mean_best_CV_tag_var <- |
|
318 |
- rlang::sym(paste0(mean_best_CV_var, "_tag")) |
|
319 |
- dataC_mfi <- dataC_mfi %>% |
|
320 |
- mutate(TAG_mfi = ifelse(is.na(TAG_mfi), 0, TAG_mfi)) %>% |
|
321 |
- mutate(!!mean_best_CV_tag_var := !!mean_best_CV_var - TAG_mfi) %>% |
|
322 |
- dplyr::select(c(vars_in, paste(mean_best_CV_tag_var))) %>% |
|
323 |
- ## add the batch variables to the data |
|
324 |
- mutate(machine = batch_vars[["machine"]] , day = batch_vars[["day"]]) |
|
325 |
- return(dataC_mfi) |
|
326 |
- } |
327 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,24 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "9120B816", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/git_bioc_link.txt", |
|
4 |
- "project_path": null, |
|
5 |
- "type": "text", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647626123539.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 14, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source" |
|
15 |
- }, |
|
16 |
- "folds": "", |
|
17 |
- "lastKnownWriteTime": 1643058003, |
|
18 |
- "encoding": "UTF-8", |
|
19 |
- "collab_server": "", |
|
20 |
- "source_window": "", |
|
21 |
- "last_content_update": 1643058003, |
|
22 |
- "read_only": false, |
|
23 |
- "read_only_alternatives": [] |
|
24 |
-} |
|
25 | 0 |
\ No newline at end of file |
5 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "991A0EA2", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/normalisation_functions.R", |
|
4 |
- "project_path": "R/normalisation_functions.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1644591162172.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 12, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "3,27", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647580101, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647580101, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,627 +0,0 @@ |
1 |
-#' Normalize Arrays |
|
2 |
-#' |
|
3 |
-#' @param matrix_antigen An object of class matrix with features/proteins as columns and samples as the rows |
|
4 |
-#' @param method character string specifying the normalization method. Choices are \code{"none","log2","vsn","cyclic_loess"} |
|
5 |
-#' \code{"cyclic_loess_log" ,"rlm"} |
|
6 |
-#' @param batch_correct A logical value indicating whether batch correction should be done or not |
|
7 |
-#' @param batch_var1 A character or factor vector of size similar to rows of \code{matrix_antigen} indicating the first batch. |
|
8 |
-#' @param batch_var2 A character or factor vector of size similar to rows of \code{matrix_antigen} indicating the second batch. |
|
9 |
-#' @param return_plot A logical value indicating whether a plot is returned to show the results of normalisation. |
|
10 |
-#' @param control_antigens logical vector specifying the subset of spots which are non-differentially-expressed control spots, |
|
11 |
-#' for use with \code{method="rlm"} |
|
12 |
-#' @param array_matrix An object of class dataframe or matrix used with \code{method='rlm'} indicating the sample index and |
|
13 |
-#' @param plot_by_antigen Logical to indicate whether to plot by antigen or not |
|
14 |
-#' slide name for the matrix_antigen object. |
|
15 |
-#' @import limma tibble vsn |
|
16 |
-#' @return A data frame of normalised values |
|
17 |
-#' @export |
|
18 |
-#' |
|
19 |
-#' @examples |
|
20 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
21 |
-#' #VSN |
|
22 |
-#' normlise_vsn <- matrix_normalise(as.matrix(matrix_antigen), |
|
23 |
-#' method = "vsn", |
|
24 |
-#' return_plot = TRUE |
|
25 |
-#' ) |
|
26 |
-#' ## log |
|
27 |
-#' normlise_log <- matrix_normalise(as.matrix(matrix_antigen), |
|
28 |
-#' method = "log2", |
|
29 |
-#' return_plot = TRUE |
|
30 |
-#' ) |
|
31 |
-#' ## cyclic_loess_log |
|
32 |
-#' normlise_cylic_log <- matrix_normalise(as.matrix(matrix_antigen), |
|
33 |
-#' method = "cyclic_loess_log", |
|
34 |
-#' return_plot = TRUE |
|
35 |
-#' ) |
|
36 |
-matrix_normalise <- |
|
37 |
- function(matrix_antigen, |
|
38 |
- method = "log2", |
|
39 |
- batch_correct = FALSE, |
|
40 |
- batch_var1, |
|
41 |
- batch_var2 = day_batches, |
|
42 |
- return_plot = FALSE, |
|
43 |
- plot_by_antigen = TRUE, |
|
44 |
- control_antigens = NULL, |
|
45 |
- array_matrix = NULL) { |
|
46 |
- if (method == "log2") { |
|
47 |
- exprs_log <- matrix_antigen |
|
48 |
- ## since log can handle negative values , we convert all the negative values to a constant value |
|
49 |
- # exprs_log[exprs_log<0] <- 1 |
|
50 |
- |
|
51 |
- ## any value that is negative is allocated the smallest value in the batch |
|
52 |
- min_pos <- minpositive(exprs_log) |
|
53 |
- exprs_log <- replace(exprs_log, exprs_log < 1, min_pos) |
|
54 |
- exprs_normalised <- log2(exprs_log) |
|
55 |
- |
|
56 |
- |
|
57 |
- } else if (method == "none") { |
|
58 |
- exprs_normalised <- matrix_antigen |
|
59 |
- |
|
60 |
- } else if (method == "vsn") { |
|
61 |
- ## this approach utilises the VSN package |
|
62 |
- ## structure the data to be normalised |
|
63 |
- ## convert it to an Expression Set |
|
64 |
- exprs_antigen <- |
|
65 |
- Biobase::ExpressionSet(assayData = matrix_antigen) |
|
66 |
- |
|
67 |
- if (batch_correct == FALSE) { |
|
68 |
- ## normalise without adjusting for the strata on VSN |
|
69 |
- data_points <- dim(matrix_antigen)[[1]] |
|
70 |
- exprs_normalised <- vsn::justvsn(x = exprs_antigen , |
|
71 |
- minDataPointsPerStratum = data_points) |
|
72 |
- } else if (batch_correct == TRUE) { |
|
73 |
- exprs_normalised <- |
|
74 |
- vsn::justvsn( |
|
75 |
- x = exprs_antigen , |
|
76 |
- strata = machines , |
|
77 |
- minDataPointsPerStratum = data_points |
|
78 |
- ) |
|
79 |
- } |
|
80 |
- |
|
81 |
- |
|
82 |
- } else if (method == "cyclic_loess") { |
|
83 |
- exprs_normalised <- limma::normalizeCyclicLoess( |
|
84 |
- matrix_antigen, |
|
85 |
- weights = NULL, |
|
86 |
- span = 0.7, |
|
87 |
- iterations = 3, |
|
88 |
- method = "fast" |
|
89 |
- ) |
|
90 |
- } else if (method == "cyclic_loess_log") { |
|
91 |
- exprs_normalised <- limma::normalizeCyclicLoess( |
|
92 |
- matrix_antigen, |
|
93 |
- weights = NULL, |
|
94 |
- span = 0.7, |
|
95 |
- iterations = 3, |
|
96 |
- method = "fast" |
|
97 |
- ) |
|
98 |
- ## any value that is negative is allocated the smallest value in the batch |
|
99 |
- min_pos <- minpositive(exprs_normalised) |
|
100 |
- exprs_normalised <- |
|
101 |
- replace(exprs_normalised, exprs_normalised < 1, min_pos) |
|
102 |
- exprs_normalised <- log2(exprs_normalised) |
|
103 |
- } else if (method == "rlm") { |
|
104 |
- if (!is.null(control_antigens)) { |
|
105 |
- exprs_df <- |
|
106 |
- rlm_normalise_matrix( |
|
107 |
- matrix_antigen = matrix_antigen, |
|
108 |
- array_matrix = array_matrix, |
|
109 |
- control_antigens = control_antigens |
|
110 |
- ) |
|
111 |
- exprs_normalised <- rlm_normalise(exprs_df) |
|
112 |
- exprs_normalised <- exprs_normalised %>% |
|
113 |
- gather(variable, value,-(antigen:sampleID2)) %>% |
|
114 |
- unite(temp, antigen) %>% select(-variable) %>% |
|
115 |
- spread(temp, value) %>% |
|
116 |
- column_to_rownames(var = "sampleID2") |
|
117 |
- } else if (is.null(control_antigens) | is.null(array_matrix)) { |
|
118 |
- stop("Specify the control antigens or array_matrix to use RLM") |
|
119 |
- } |
|
120 |
- } |
|
121 |
- |
|
122 |
- cv_val <- |
|
123 |
- round(sd(as.matrix(exprs_normalised), na.rm = TRUE) / mean(as.matrix(exprs_normalised), na.rm = TRUE), 4) * |
|
124 |
- 100 |
|
125 |
- cv_val <- paste(cv_val, "%", sep = "") |
|
126 |
- #exprs_normalised_df <- as.data.frame(exprs_normalised) |
|
127 |
- |
|
128 |
- ## return the data in the structure as it was supplied |
|
129 |
- if (method == "vsn") { |
|
130 |
- ## this kept if a different structure is considered |
|
131 |
- matrix_antigen_normalised <- t(as.data.frame(exprs_normalised)) |
|
132 |
- row.names(matrix_antigen_normalised) <- |
|
133 |
- as.numeric(gsub("X", "", row.names(matrix_antigen_normalised))) |
|
134 |
- matrix_antigen_normalised <- |
|
135 |
- as.data.frame(matrix_antigen_normalised) |
|
136 |
- } else{ |
|
137 |
- matrix_antigen_normalised <- as.data.frame(exprs_normalised) |
|
138 |
- #matrix_antigen_normalised <- exprs_normalised_df |
|
139 |
- } |
|
140 |
- |
|
141 |
- ## Whether to plot by sample or antigen |
|
142 |
- if (plot_by_antigen == TRUE) { |
|
143 |
- plot_normalisation <- |
|
144 |
- plot_normalised_antigen( |
|
145 |
- exprs_normalised_df = matrix_antigen_normalised, |
|
146 |
- method = method, |
|
147 |
- batch_correct = batch_correct |
|
148 |
- ) |
|
149 |
- } else{ |
|
150 |
- plot_normalisation <- |
|
151 |
- plot_normalised( |
|
152 |
- exprs_normalised_df = matrix_antigen_normalised, |
|
153 |
- method = method, |
|
154 |
- batch_correct = batch_correct |
|
155 |
- ) |
|
156 |
- } |
|
157 |
- |
|
158 |
- |
|
159 |
- |
|
160 |
- if (return_plot == TRUE) { |
|
161 |
- return( |
|
162 |
- list( |
|
163 |
- matrix_antigen_normalised = matrix_antigen_normalised, |
|
164 |
- plot_normalisation = plot_normalisation |
|
165 |
- ) |
|
166 |
- ) |
|
167 |
- } else { |
|
168 |
- return(matrix_antigen_normalised) |
|
169 |
- } |
|
170 |
- |
|
171 |
- } |
|
172 |
- |
|
173 |
- |
|
174 |
- |
|
175 |
- |
|
176 |
- |
|
177 |
- |
|
178 |
- |
|
179 |
- |
|
180 |
- |
|
181 |
-#' Nomrmalise using RLM |
|
182 |
-#' @param matrix_antigen A matrix with antigen data |
|
183 |
-#' @param array_matrix A matrix with control antigen data |
|
184 |
-#' @param control_antigens the control antigens for RLM normalisation |
|
185 |
-#' @description A function for \code{method='rlm'} from \code{\link{matrix_normalise}}. |
|
186 |
-#' @return A RLM normalised data frame |
|
187 |
-#' @import dplyr |
|
188 |
-#' @export |
|
189 |
-#' |
|
190 |
-#' @examples |
|
191 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
192 |
-#' # rlm_normalise_matrix(matrix_antigen=matrix_antigen, array_matrix=array_matrix, |
|
193 |
-#' # control_antigens=control_antigens) |
|
194 |
-rlm_normalise_matrix <- |
|
195 |
- function(matrix_antigen, |
|
196 |
- array_matrix, |
|
197 |
- control_antigens) { |
|
198 |
- array_matrix$sample_index <- |
|
199 |
- as.character(array_matrix$sample_index) |
|
200 |
- |
|
201 |
- array_matrix <- array_matrix %>% |
|
202 |
- group_by(slide) %>% |
|
203 |
- mutate(Array = group_indices()) |
|
204 |
- |
|
205 |
- rlm_normalise_df <- as.data.frame.matrix(matrix_antigen) %>% |
|
206 |
- #dplyr::mutate(sample_index=row.names(matrix_antigen)) |
|
207 |
- rownames_to_column(var = "sample_index") |
|
208 |
- |
|
209 |
- |
|
210 |
- rlm_normalise_df <- rlm_normalise_df %>% |
|
211 |
- select(sample_index, everything()) %>% |
|
212 |
- gather(antigen, MFI_val, -sample_index) %>% |
|
213 |
- left_join(array_matrix, by = "sample_index") %>% |
|
214 |
- rename(Block = sample_array_ID) %>% |
|
215 |
- mutate(Block = as.numeric(Block)) |
|
216 |
- |
|
217 |
- |
|
218 |
- ## create a variable to indicate the control antigens |
|
219 |
- rlm_normalise_df <- rlm_normalise_df %>% |
|
220 |
- mutate(Description = ifelse(antigen %in% all_of(control_antigens) , "Control", "Sample")) |
|
221 |
- return(rlm_normalise_df) |
|
222 |
- } |
|
223 |
- |
|
224 |
- |
|
225 |
-#' RLM normalisation |
|
226 |
-#' |
|
227 |
-#' @param rlm_normalise_df rlm normalised data frame |
|
228 |
-#' @description A function for \code{method='rlm'} from \code{\link{matrix_normalise}}. |
|
229 |
-#' @return an elist of RLM normalisation to be utilised by \code{\link{rlm_normalise_matrix}} |
|
230 |
-#' @export |
|
231 |
-#' @keywords internal |
|
232 |
-#' @examples |
|
233 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
234 |
-#' #rlm_normalise_df <- rlm_normalise_matrix(matrix_antigen=matrix_antigen, array_matrix=array_matrix, |
|
235 |
-#' # control_antigens=control_antigens) |
|
236 |
-#' # rlm_normalise(rlm_normalise_df) |
|
237 |
-rlm_normalise <- function(rlm_normalise_df) { |
|
238 |
- rlm_normalise_C <- rlm_normalise_df %>% |
|
239 |
- filter(grepl("Control", Description)) |
|
240 |
- rlm_normalise_E <- rlm_normalise_df %>% |
|
241 |
- filter(grepl("Sample", Description)) |
|
242 |
- |
|
243 |
- |
|
244 |
- ## create the matrix of the control antigens |
|
245 |
- rlm_normalise_C <- rlm_normalise_C %>% |
|
246 |
- select(Array, Block, antigen, MFI_val) %>% |
|
247 |
- spread(Array, MFI_val) |
|
248 |
- |
|
249 |
- ## create the matrix of the sample antigens |
|
250 |
- rlm_normalise_E <- rlm_normalise_E %>% |
|
251 |
- select(Array, Block, antigen, MFI_val) %>% |
|
252 |
- spread(Array, MFI_val) |
|
253 |
- |
|
254 |
- |
|
255 |
- ## create Elist for the controls and samples |
|
256 |
- sample_elist <- list(E = rlm_normalise_E %>% |
|
257 |
- select(-c(Block, antigen)) , |
|
258 |
- genes = rlm_normalise_E %>% select(Block:antigen)) |
|
259 |
- |
|
260 |
- |
|
261 |
- controls_elist <- list(E = rlm_normalise_C %>% |
|
262 |
- select(-c(Block, antigen)) , |
|
263 |
- genes = rlm_normalise_C %>% |
|
264 |
- select(Block:antigen)) |
|
265 |
- |
|
266 |
- |
|
267 |
- ## changing less values to the lowest positivie MFI value |
|
268 |
- min_pos <- minpositive(controls_elist$E) |
|
269 |
- controls_elist$E <- |
|
270 |
- replace(controls_elist$E, controls_elist$E < 1, min_pos / 2) |
|
271 |
- |
|
272 |
- ## change the control EList to log2 --> |
|
273 |
- ## RLM reccommends |
|
274 |
- controls_elist$E <- as.matrix(controls_elist$E) |
|
275 |
- controls_elist$E <- log2(controls_elist$E) |
|
276 |
- rownames(controls_elist$E) <- controls_elist$genes$antigen |
|
277 |
- |
|
278 |
- contr_names <- unique(rownames(controls_elist$E)) |
|
279 |
- contr_names_len <- length(contr_names) |
|
280 |
- contr_mapping <- matrix(nrow = contr_names_len, ncol = 1) |
|
281 |
- rownames(contr_mapping) <- contr_names |
|
282 |
- contr_mapping[, 1] <- seq_len(contr_names_len) |
|
283 |
- |
|
284 |
- |
|
285 |
- ##the number of not control antigens |
|
286 |
- p_features <- nrow(sample_elist$E) |
|
287 |
- p_controls <- nrow(controls_elist$E) |
|
288 |
- n_arrays <- ncol(sample_elist$E) |
|
289 |
- n_blocks <- max(controls_elist$genes$Block , na.rm = TRUE) |
|
290 |
- y <- c(controls_elist$E) |
|
291 |
- |
|
292 |
- |
|
293 |
- ## should be contr_names_len - 3 ... check |
|
294 |
- ## should it be changed when the controls are more |
|
295 |
- dummies <- matrix(0, ncol = { |
|
296 |
- n_arrays + n_blocks + contr_names_len - 3 |
|
297 |
- }, nrow = length(y)) |
|
298 |
- |
|
299 |
- |
|
300 |
- rnames <- vector(length = length(y)) |
|
301 |
- a_cols <- paste("a", 1:{ |
|
302 |
- n_arrays - 1 |
|
303 |
- }, sep = "") |
|
304 |
- |
|
305 |
- |
|
306 |
- b_cols <- paste("b", 1:{ |
|
307 |
- n_blocks - 1 |
|
308 |
- }, sep = "") |
|
309 |
- |
|
310 |
- |
|
311 |
- |
|
312 |
- ####changed |
|
313 |
- t_cols <- paste("t", 1:{ |
|
314 |
- contr_names_len - 1 |
|
315 |
- }, sep = "") |
|
316 |
- |
|
317 |
- |
|
318 |
- |
|
319 |
- colnames(dummies) <- c(a_cols, b_cols, t_cols) |
|
320 |
- |
|
321 |
- ## matrix to hold the parameters per array for each of the control antigen |
|
322 |
- a_params <- matrix(nrow = n_arrays, ncol = 2) |
|
323 |
- rownames(a_params) <- colnames(controls_elist$E) |
|
324 |
- |
|
325 |
- ## matrix to hold the parameters for the Blocks |
|
326 |
- b_params <- matrix(nrow = n_blocks, ncol = 2) |
|
327 |
- idx <- 1 |
|
328 |
- |
|
329 |
- ## putting values to the defined matrix |
|
330 |
- # fill the matrix with 1's in such manner that for each row |
|
331 |
- # the combination of 1's and 0's is unique |
|
332 |
- for (i in seq_len(n_arrays)) { |
|
333 |
- a_tmp <- paste("a", i, sep = "") |
|
334 |
- a_params[i, 1] <- a_tmp |
|
335 |
- for (j in seq_len(p_controls)) { |
|
336 |
- rnames[idx] <- rownames(controls_elist$E)[j] |
|
337 |
- b_idx <- controls_elist$genes$Block[j] |
|
338 |
- b_tmp <- paste("b", b_idx, sep = "") |
|
339 |
- b_params[b_idx, 1] <- b_tmp |
|
340 |
- t_idx <- contr_mapping[rownames(controls_elist$E)[j], |
|
341 |
- 1] |
|
342 |
- t_tmp <- paste("t", t_idx, sep = "") |
|
343 |
- if (i == n_arrays) { |
|
344 |
- dummies[idx, a_cols] <- -1 |
|
345 |
- } |
|
346 |
- else { |
|
347 |
- dummies[idx, a_tmp] <- 1 |
|
348 |
- } |
|
349 |
- if (b_idx == n_blocks) { |
|
350 |
- dummies[idx, b_cols] <- -1 |
|
351 |
- } |
|
352 |
- else { |
|
353 |
- dummies[idx, b_tmp] <- 1 |
|
354 |
- } |
|
355 |
- if (t_idx == contr_names_len) { |
|
356 |
- dummies[idx, t_cols] <- -1 |
|
357 |
- } |
|
358 |
- else { |
|
359 |
- dummies[idx, t_tmp] <- 1 |
|
360 |
- } |
|
361 |
- idx <- idx + 1 |
|
362 |
- } |
|
363 |
- } |
|
364 |
- |
|
365 |
- ## bind with the control spots |
|
366 |
- dummies <- cbind(y, dummies) |
|
367 |
- rownames(dummies) <- rnames |
|
368 |
- |
|
369 |
- ## add the error term |
|
370 |
- e <- rnorm(length(y), 0, 1) |
|
371 |
- |
|
372 |
- ## run the model of controls and a matrix created out of controls |
|
373 |
- rlm_result <- MASS::rlm(y ~ . + e, data = data.frame(dummies)) |
|
374 |
- |
|
375 |
- ### PAA approach |
|
376 |
- a_params[-n_arrays, 2] <- |
|
377 |
- rlm_result$coefficients[a_params[-n_arrays, 1]] |
|
378 |
- a_params_sum <- sum(as.numeric(a_params[-n_arrays, 2])) |
|
379 |
- a_params[n_arrays, 2] <- -a_params_sum |
|
380 |
- |
|
381 |
- |
|
382 |
- b_params[-n_blocks, 2] <- |
|
383 |
- rlm_result$coefficients[b_params[-n_blocks, 1]] |
|
384 |
- b_params_sum <- sum(as.numeric(b_params[-n_blocks, 2])) |
|
385 |
- b_params[n_blocks, 2] <- -b_params_sum |
|
386 |
- |
|
387 |
- |
|
388 |
- a <- t(matrix(rep(as.numeric(a_params[, 2]), p_features), |
|
389 |
- ncol = p_features)) |
|
390 |
- |
|
391 |
- b <- matrix(mapply(function(i) { |
|
392 |
- as.numeric(b_params[sample_elist$genes$Block[i], 2]) |
|
393 |
- }, rep(seq_len(p_features), n_arrays)), ncol = n_arrays) |
|
394 |
- |
|
395 |
- |
|
396 |
- |
|
397 |
- ### added by ken - to convert all the negatives in the sample antigens to make them to the half of |
|
398 |
- ## the lowest positive |
|
399 |
- |
|
400 |
- min_pos <- minpositive(sample_elist$E) |
|
401 |
- sample_elist$E <- |
|
402 |
- replace(sample_elist$E, sample_elist$E < 1, min_pos / 2) |
|
403 |
- |
|
404 |
- ### |
|
405 |
- ## sample antigens matrix log 2 |
|
406 |
- sample_elist_log <- as.matrix(log2(sample_elist$E)) |
|
407 |
- |
|
408 |
- sample_elist_normalised <- sample_elist_log - a - b |
|
409 |
- row.names(sample_elist_normalised) <- |
|
410 |
- paste0(sample_elist$genes$Block, "_", sample_elist$genes$antigen) |
|
411 |
- |
|
412 |
- |
|
413 |
- sample_elist_normalised <- |
|
414 |
- as.data.frame(sample_elist_normalised) %>% |
|
415 |
- rownames_to_column("antigen_name") %>% |
|
416 |
- mutate( |
|
417 |
- Block = as.integer(sub('_.*$', '', antigen_name)) , |
|
418 |
- antigen = sub("[^_]*(.*)", "\\1", antigen_name), |
|
419 |
- antigen = sub('.', '', antigen) |
|
420 |
- ) %>% |
|
421 |
- select(Block, antigen_name, antigen, everything()) |
|
422 |
- |
|
423 |
- elist_normalised_df <- sample_elist_normalised %>% |
|
424 |
- gather(Array, meanBest2_RLM, -c("Block", "antigen_name", "antigen")) %>% |
|
425 |
- mutate(Array = as.numeric(Array)) %>% |
|
426 |
- right_join(rlm_normalise_df, by = c("antigen", "Block", "Array")) |
|
427 |
- |
|
428 |
- |
|
429 |
- elist_normalised_df <- |
|
430 |
- elist_normalised_df %>% group_by(sample_index, slide) |
|
431 |
- |
|
432 |
- ## add the sample ID variable |
|
433 |
- elist_normalised_df$sampleID2 <- |
|
434 |
- group_indices(.data = elist_normalised_df) |
|
435 |
- |
|
436 |
- ## |
|
437 |
- elist_normalised_df <- elist_normalised_df %>% |
|
438 |
- ungroup() %>% |
|
439 |
- select(antigen, sampleID2, meanBest2_RLM) |
|
440 |
- |
|
441 |
- return(elist_normalised_df) |
|
442 |
- |
|
443 |
-} |
|
444 |
- |
|
445 |
- |
|
446 |
-#' Trend test using Cox–Stuart (C–S) and Mann–Kendall (M–K) trend tests |
|
447 |
-#' |
|
448 |
-#' @param name Name of the test |
|
449 |
-#' @param p_val p value from the test |
|
450 |
-#' @param z_val the Z value of the test |
|
451 |
-#' |
|
452 |
-#' @return A statistics of mean standard deviation trend |
|
453 |
-#' @export |
|
454 |
-#' |
|
455 |
-#' @examples |
|
456 |
-#' output_trend_stats(name="t.test",p_val=0.001, z_val=5) |
|
457 |
-output_trend_stats <- function(name, p_val, z_val) { |
|
458 |
- if (p_val < 0.00001) { |
|
459 |
- prop <- "<0.00001" |
|
460 |
- z <- round(z_val, 4) |
|
461 |
- } else{ |
|
462 |
- prop <- as.character(round(p_val, 4)) |
|
463 |
- z <- round(z_val, 4) |
|
464 |
- } |
|
465 |
- output <- paste0(name, ' Z-val = ', z, ' P-val = ', prop) |
|
466 |
-} |
|
467 |
- |
|
468 |
- |
|
469 |
- |
|
470 |
-#' Comparison of normalised data by sample |
|
471 |
-#' |
|
472 |
-#' @param exprs_normalised_df a normalised data frame |
|
473 |
-#' @param method the method of normalisation used |
|
474 |
-#' @param batch_correct the batch correction |
|
475 |
-#' |
|
476 |
-#' @import dplyr ggplot2 |
|
477 |
-#' @importFrom readr read_csv |
|
478 |
-#' @importFrom Kendall MannKendall |
|
479 |
-#' @importFrom genefilter rowSds |
|
480 |
-#' @importFrom plyr . |
|
481 |
-#' @return A ggplot of normalised data |
|
482 |
-#' @export |
|
483 |
-#' |
|
484 |
-#' @examples |
|
485 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
486 |
-#' normlise_vsn <- matrix_normalise(as.matrix(matrix_antigen), |
|
487 |
-#' method = "vsn", |
|
488 |
-#' return_plot = FALSE |
|
489 |
-#' ) |
|
490 |
-#' plot_normalised(normlise_vsn,method="vsn",batch_correct=FALSE) |
|
491 |
-plot_normalised <- |
|
492 |
- function(exprs_normalised_df, |
|
493 |
- method, |
|
494 |
- batch_correct) { |
|
495 |
- exprs_normalised_df_plot <- exprs_normalised_df %>% |
|
496 |
- dplyr::mutate( |
|
497 |
- mean_all_anti = rowMeans(., na.rm = TRUE), |
|
498 |
- stdev_all_anti = genefilter::rowSds(as.matrix(.), na.rm = TRUE) |
|
499 |
- ) %>% |
|
500 |
- dplyr::mutate( |
|
501 |
- rank_mean_all_anti = rank(mean_all_anti) , |
|
502 |
- method = method, |
|
503 |
- batch_correct = batch_correct |
|
504 |
- ) %>% |
|
505 |
- arrange(rank_mean_all_anti) |
|
506 |
- |
|
507 |
- # perform the trend test using the Cox–Stuart (C–S) and Mann–Kendall (M–K) trend tests for |
|
508 |
- #for the null hypothesis of no trend in the transformed standard deviations under several transformation |
|
509 |
- #cs_trend <- trend::cs.test(exprs_normalised_df_plot$stdev_all_anti) |
|
510 |
- # mk_trend <- trend::mk.test(exprs_normalised_df_plot$stdev_all_anti) |
|
511 |
- |
|
512 |
- mk_trend <- |
|
513 |
- Kendall::MannKendall(exprs_normalised_df_plot$stdev_all_anti) |
|
514 |
- |
|
515 |
- |
|
516 |
- |
|
517 |
- cs_stuart <- "Cox-Stuart" |
|
518 |
- m_kendall <- |
|
519 |
- output_trend_stats('Mann-Kendall (tau stats)', mk_trend$sl[[1]], mk_trend$tau[[1]]) |
|
520 |
- |
|
521 |
- |
|
522 |
- |
|
523 |
- normalisation_approaches <- c( |
|
524 |
- "Log2" = "log2", |
|
525 |
- "VSN" = "vsn", |
|
526 |
- "Cyclic Loess" = "cyclic_loess", |
|
527 |
- "Cyclic Loess (log)" = "cyclic_loess_log", |
|
528 |
- "RLM" = "rlm" |
|
529 |
- ) |
|
530 |
- norm_method <- names(which(normalisation_approaches == method)) |
|
531 |
- |
|
532 |
- p_norm <- |
|
533 |
- ggplot(exprs_normalised_df_plot , |
|
534 |
- aes(x = rank_mean_all_anti, y = stdev_all_anti)) + |
|
535 |
- geom_jitter(color = "red") + theme_classic() + |
|
536 |
- expand_limits(y = c(0, 10)) + stat_cor() + geom_smooth(color = 'blue', se = FALSE, size = |
|
537 |
- 0.5) + |
|
538 |
- ggtitle(paste(norm_method, "Normalisation")) + xlab("pooled mean rank (mean of features by sample)") + |
|
539 |
- ylab("pooled SD") + |
|
540 |
- labs(caption = paste0(m_kendall , "\n", cs_stuart)) + |
|
541 |
- theme(plot.caption = element_text( |
|
542 |
- hjust = 0, |
|
543 |
- color = "black", |
|
544 |
- face = "italic" |
|
545 |
- )) |
|
546 |
- return(p_norm) |
|
547 |
- } |
|
548 |
- |
|
549 |
- |
|
550 |
- |
|
551 |
-#' Comparison of normalised data by feature |
|
552 |
-#' |
|
553 |
-#' @param exprs_normalised_df a normalised data frame |
|
554 |
-#' @param method the method of normalisation used |
|
555 |
-#' @param batch_correct the batch correction |
|
556 |
-#' |
|
557 |
-#' @import dplyr |
|
558 |
-#' @importFrom Kendall MannKendall |
|
559 |
-#' @return A ggplot of various normalisation approaches |
|
560 |
-#' @export |
|
561 |
-#' |
|
562 |
-#' @examples |
|
563 |
-#' matrix_antigen <- readr::read_csv(system.file("extdata", "matrix_antigen.csv", package="protGear")) |
|
564 |
-#' normlise_vsn <- matrix_normalise(as.matrix(matrix_antigen), |
|
565 |
-#' method = "vsn", |
|
566 |
-#' return_plot = FALSE |
|
567 |
-#' ) |
|
568 |
-#' plot_normalised_antigen(normlise_vsn,method="vsn",batch_correct=FALSE) |
|
569 |
-plot_normalised_antigen <- |
|
570 |
- function(exprs_normalised_df, |
|
571 |
- method, |
|
572 |
- batch_correct) { |
|
573 |
- ## how can we combine these two functions and make them the same |
|
574 |
- antigen_summ <- exprs_normalised_df %>% |
|
575 |
- gather(antigen, MFI) %>% |
|
576 |
- group_by(antigen) %>% |
|
577 |
- dplyr::summarise(mean_mfi = mean(MFI, na.rm = TRUE), |
|
578 |
- sd_mfi = sd(MFI, na.rm = TRUE)) %>% |
|
579 |
- dplyr::mutate( |
|
580 |
- rank_mean_all_anti = rank(mean_mfi) , |
|
581 |
- method = method, |
|
582 |
- batch_correct = batch_correct |
|
583 |
- ) %>% |
|
584 |
- arrange(rank_mean_all_anti) |
|
585 |
- |
|
586 |
- # perform the trend test using the Cox–Stuart (C–S) and Mann–Kendall (M–K) trend tests for |
|
587 |
- #for the null hypothesis of no trend in the transformed standard deviations under several transformation |
|
588 |
- #cs_trend2 <- trend::cs.test(antigen_summ$sd_mfi) |
|
589 |
- #mk_trend2 <- trend::mk.test(antigen_summ$sd_mfi[!is.na(antigen_summ$sd_mfi)]) |
|
590 |
- |
|
591 |
- #cs_stuart2 <- output_trend_stats("Cox-Stuart",cs_trend2$p.value, cs_trend2$statistic) |
|
592 |
- #m_kendall2 <- output_trend_stats('Mann-Kendall',mk_trend2$p.value,mk_trend2$statistic ) |
|
593 |
- |
|
594 |
- ## Changed here to use Kendall packages |
|
595 |
- mk_trend2 <- |
|
596 |
- Kendall::MannKendall(antigen_summ$sd_mfi[!is.na(antigen_summ$sd_mfi)]) |
|
597 |
- cs_stuart2 <- |
|
598 |
- "Cox-Stuart" #output_trend_stats("Cox-Stuart",cs_trend2$p.value, cs_trend2$statistic[[1]]) |
|
599 |
- m_kendall2 <- |
|
600 |
- output_trend_stats('Mann-Kendall (tau stats)', mk_trend2$sl[[1]], mk_trend2$tau[[1]]) |
|
601 |
- |
|
602 |
- |
|
603 |
- normalisation_approaches <- c( |
|
604 |
- "Log2" = "log2", |
|
605 |
- "VSN" = "vsn", |
|
606 |
- "Cyclic Loess" = "cyclic_loess", |
|
607 |
- "Cyclic Loess (log)" = "cyclic_loess_log", |
|
608 |
- "RLM" = "rlm" |
|
609 |
- ) |
|
610 |
- norm_method <- names(which(normalisation_approaches == method)) |
|
611 |
- |
|
612 |
- p_norm <- |
|
613 |
- ggplot(antigen_summ , aes(x = rank_mean_all_anti, y = sd_mfi)) + |
|
614 |
- geom_jitter(color = "red") + theme_classic() + |
|
615 |
- expand_limits(y = c(0, 10)) + stat_cor() + geom_smooth(color = 'blue', se = FALSE, size = |
|
616 |
- 0.5) + |
|
617 |
- ggtitle(paste(norm_method, "Normalisation")) + xlab("Pooled mean rank (mean of features)") + |
|
618 |
- ylab("pooled SD") + |
|
619 |
- labs(caption = paste0(m_kendall2 , "\n", cs_stuart2)) + |
|
620 |
- theme(plot.caption = element_text( |
|
621 |
- hjust = 0, |
|
622 |
- color = "black", |
|
623 |
- face = "italic" |
|
624 |
- )) |
|
625 |
- |
|
626 |
- return(p_norm) |
|
627 |
- } |
628 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "A69B539A", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/global_functions.R", |
|
4 |
- "project_path": "R/global_functions.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647521299135.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 11, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "7,16", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647580133, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647580133, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,239 +0,0 @@ |
1 |
-##________________________________________________________________________### |
|
2 |
-###creating a pipe operator to negate the %in% operator |
|
3 |
-#https://cran.r-project.org/web/packages/magrittr/vignettes/magrittr.html |
|
4 |
-# http://stackoverflow.com/questions/24941080/meaning-of-symbol-in-r |
|
5 |
-"%ni%" <- Negate("%in%") |
|
6 |
- |
|
7 |
- |
|
8 |
-#' Get the minimum positive value |
|
9 |
-#' |
|
10 |
-#' @param x A numeric vector or variable |
|
11 |
-#' |
|
12 |
-#' @return Returns the minimum positive value in an object |
|
13 |
-#' @export |
|
14 |
-#' |
|
15 |
-#' @examples |
|
16 |
-#' minpositive(c(-1,-2,3,5,6,7,8,9,10)) |
|
17 |
-minpositive <- function(x) { |
|
18 |
- min(x[x > 0], na.rm = TRUE) |
|
19 |
-} |
|
20 |
- |
|
21 |
- |
|
22 |
- |
|
23 |
-#' List the array structure variables |
|
24 |
-#' |
|
25 |
-#' @param channel A character indicating the channel that the data was scanned at. It is mostly included in the MFI variable names. |
|
26 |
-#' @param totsamples A numeric value indicating teh number of samples on a slide. |
|
27 |
-#' @param blockspersample A numeric value indicating the numer of blocks in a mini-array. The \code{".gal"} file can help in getting this |
|
28 |
-#' @param chip_path A character indicating the path of the folder location with the array data. |
|
29 |
-#' @param sampleID_path A character indicating the path of the folder location with the sample identifiers matching the array structure. |
|
30 |
-#' @param mig_prefix Optional: A character indicating the identifier of an MIG dilution file |
|
31 |
-#' @param machine Optional:A character indicating the machine used to process the data in the folder |
|
32 |
-#' @param FG Optional:A character indicating the name of the foreground variable name. if not specified its created as \code{paste0("F",channel,".Median")} |
|
33 |
-#' @param BG Optional:A character indicating the name of the background variable name. if not specified its created as \code{paste0("B",channel,".Median")} |
|
34 |
-#' @param FBG Optional:A character indicating the name of the foreground - background variable name. if not specified its created as \code{paste0("F",channel,".Median...B",channel)} |
|
35 |
-#' @param date_process Optional:A character indicating the date when the samples were processed. |
|
36 |
-#' |
|
37 |
-#' @description A generic function returning a list with the data structure. |
|
38 |
-#' @importFrom rlang sym |
|
39 |
-#' @return a list of parameters required to process the data |
|
40 |
-#' @export |
|
41 |
-#' |
|
42 |
-#' @examples |
|
43 |
-#' ## specify the the parameters to process the data |
|
44 |
-#' genepix_vars <- array_vars( |
|
45 |
-#' ## the channel the data was processed in |
|
46 |
-#' channel = "635", |
|
47 |
-#' ## folder where the array data is stored |
|
48 |
-#' chip_path = "data/array_data", |
|
49 |
-#' ## the number of samples per slide or in as single run |
|
50 |
-#' totsamples = 21, |
|
51 |
-#' ## How many blocks each sample occupies |
|
52 |
-#' blockspersample = 2, |
|
53 |
-#' ## folder where the array data samples id files are stored |
|
54 |
-#' sampleID_path = "data/array_sampleID/", |
|
55 |
-#' ## optional |
|
56 |
-#' mig_prefix = "_first", |
|
57 |
-#' machine = 1, |
|
58 |
-#' date_process = "0520" |
|
59 |
-#' ) |
|
60 |
-#' genepix_vars |
|
61 |
-#' @return genepix_vars |
|
62 |
-#' |
|
63 |
-array_vars <- function(channel = "635", |
|
64 |
- totsamples , |
|
65 |
- FG = "", |
|
66 |
- BG = "", |
|
67 |
- FBG = "", |
|
68 |
- blockspersample, |
|
69 |
- chip_path = "data/array_data", |
|
70 |
- sampleID_path = "data/array_sampleID/", |
|
71 |
- mig_prefix = "_first", |
|
72 |
- machine = "", |
|
73 |
- date_process = "") { |
|
74 |
- ####List the directories with the CHIP data############### |
|
75 |
- paths <- list.dirs(path = chip_path, recursive = TRUE) |
|
76 |
- ## remove the parent directory |
|
77 |
- ## the folders with the chip data with the different batches is left |
|
78 |
- paths <- paths[!grepl(paste0(chip_path, "$") , paths)] |
|
79 |
- if (FG == "") { |
|
80 |
- FG <- rlang::sym(paste0("F", channel, ".Median")) |
|
81 |
- } else{ |
|
82 |
- FG <- rlang::sym(FG) |
|
83 |
- } |
|
84 |
- if (BG == "") { |
|
85 |
- BG <- rlang::sym(paste0("B", channel, ".Median")) |
|
86 |
- } else{ |
|
87 |
- BG <- rlang::sym(BG) |
|
88 |
- } |
|
89 |
- if (FBG == "") { |
|
90 |
- FBG <- rlang::sym(paste0("F", channel, ".Median...B", channel)) |
|
91 |
- } else{ |
|
92 |
- FBG <- rlang::sym(FBG) |
|
93 |
- } |
|
94 |
- |
|
95 |
- genepix_vars <- |
|
96 |
- list( |
|
97 |
- FG = FG, |
|
98 |
- #rlang::sym(paste0("F",channel,".Median")), |
|
99 |
- BG = BG, |
|
100 |
- #rlang::sym(paste0("B",channel,".Median")) , |
|
101 |
- FBG = FBG, |
|
102 |
- #rlang::sym(paste0("F",channel,".Median...B",channel)), |
|
103 |
- paths = paths, |
|
104 |
- chip_path = chip_path, |
|
105 |
- sampleID_path = sampleID_path, |
|
106 |
- mig_prefix = mig_prefix, |
|
107 |
- machine = machine, |
|
108 |
- date_process = date_process, |
|
109 |
- totsamples = totsamples, |
|
110 |
- blockspersample = blockspersample, |
|
111 |
- mp = machine, |
|
112 |
- dp = date_process |
|
113 |
- ) |
|
114 |
- return(genepix_vars) |
|
115 |
-} |
|
116 |
- |
|
117 |
- |
|
118 |
- |
|
119 |
- |
|
120 |
- |
|
121 |
-#' Title Create directory function |
|
122 |
-#' |
|
123 |
-#' |
|
124 |
-#' @param path folder location to create a directory |
|
125 |
-#' @description creating a directory |
|
126 |
-#' @return created directory |
|
127 |
-#' @export |
|
128 |
-#' |
|
129 |
-#' @examples |
|
130 |
-#' create_dir("data/sample_folder") |
|
131 |
-create_dir <- function(path) { |
|
132 |
- if (!file.exists(paste0(path))) { |
|
133 |
- dir.create(path) |
|
134 |
- } else |
|
135 |
- warning("The folder", path, " already exists") |
|
136 |
-} |
|
137 |
- |
|
138 |
- |
|
139 |
-###___________________________________________________ |
|
140 |
- |
|
141 |
-#' Object names of a list |
|
142 |
-#' |
|
143 |
-#' @param i - a list filenames with .txt or .gpr extension |
|
144 |
-#' |
|
145 |
-#' @return a list of file names |
|
146 |
-#' @export |
|
147 |
-#' @description A generic function returning a vector with the names of files in the same directory. Removes the file extension |
|
148 |
-#' @examples |
|
149 |
-#' name_of_files("KK2-06.txt") |
|
150 |
-#' @return name |
|
151 |
- |
|
152 |
-name_of_files <- function(i) { |
|
153 |
- name <- gsub("\\.txt*|\\.gpr*", "", i, perl = TRUE) |
|
154 |
- name <- gsub(" repeat", "", name, perl = TRUE) |
|
155 |
- name <- gsub(" ", "_", name, perl = TRUE) |
|
156 |
- return(name) |
|
157 |
-} |
|
158 |
- |
|
159 |
- |
|
160 |
-#___________________________________________________ |
|
161 |
-# Function to be called in case of replicated error |
|
162 |
- |
|
163 |
-#' \\\_Start_Function_For Error\\\ # |
|
164 |
-#' |
|
165 |
-#' @description A generic function to write into the log file with a replicate check error |
|
166 |
-#' @param iden An id for the file with replicates error |
|
167 |
-#' @return a log file showing the replicate errors |
|
168 |
-#' @keywords internal |
|
169 |
-#' |
|
170 |
-error_replicates <- function(iden) { |
|
171 |
- sink("log_replicates.txt" , append = TRUE) |
|
172 |
- warning("The replicates per antigen per sample are more than expected for ", |
|
173 |
- iden) |
|
174 |
- sink() |
|
175 |
-} |
|
176 |
-#' \\\_End_Function_\\\ # |
|
177 |
-#___________________________________________________ |
|
178 |
- |
|
179 |
- |
|
180 |
- |
|
181 |
- |
|
182 |
-### |
|
183 |
-#' Check existing sample ID names |
|
184 |
-#' @param genepix_vars A list of specific definitions of the experiment design. See \code{\link{array_vars}}. |
|
185 |
-#' @description A generic function to check if the file(s) witht the MFI values have a corresponding sample ID file. Sample ID file is |
|
186 |
-#' a file with the identifiers for the samples in array file. |
|
187 |
-#' @return A file with missing corresponding sample ID files |
|
188 |
-#' @importFrom stats median quantile rnorm sd |
|
189 |
-#' @importFrom utils read.csv write.table |
|
190 |
-#' @export |
|
191 |
-#' |
|
192 |
-#' @examples |
|
193 |
-#' genepix_vars <- array_vars( |
|
194 |
-#' channel = "635", |
|
195 |
-#' chip_path = system.file("extdata", "array_data/machine1/", package="protGear"), |
|
196 |
-#' totsamples = 21, |
|
197 |
-#' blockspersample = 2, |
|
198 |
-#' mig_prefix = "_first", |
|
199 |
-#' machine = 1, |
|
200 |
-#' date_process = "0520" |
|
201 |
-#' ) |
|
202 |
-#' check_sampleID_files(genepix_vars) |
|
203 |
-check_sampleID_files <- function(genepix_vars) { |
|
204 |
- ## copy all sample ID with missing CSV file |
|
205 |
- ## |
|
206 |
- sid_files <- |
|
207 |
- gsub(".csv", "", list.files(genepix_vars$sampleID_path)) |
|
208 |
- ## check if all the chip files have an existing sampleID file |
|
209 |
- sid_check <- |
|
210 |
- gsub( |
|
211 |
- ".txt|.gpr", |
|
212 |
- "", |
|
213 |
- list.files( |
|
214 |
- genepix_vars$chip_path , |
|
215 |
- recursive = TRUE, |
|
216 |
- pattern = "*.txt|*.gpr", |
|
217 |
- full.names = FALSE |
|
218 |
- ) |
|
219 |
- ) |
|
220 |
- ## convert all the file names to caps to avoid merge errrors due to case |
|
221 |
- sid_check <- toupper(sub(".*/(.*)", "\\1", sid_check)) |
|
222 |
- |
|
223 |
- ## missing sampleID for any given file |
|
224 |
- miss_id_file <- sid_check[sid_check %ni% toupper(sid_files)] |
|
225 |
- |
|
226 |
- |
|
227 |
- |
|
228 |
- if (length(miss_id_file) > 0) { |
|
229 |
- write.table(miss_id_file, "missing_IDfile.txt") |
|
230 |
- } |
|
231 |
- |
|
232 |
- if (length(miss_id_file) == 0) { |
|
233 |
- warning("All array files have a corresponding sampleID file") |
|
234 |
- return(0) |
|
235 |
- } else{ |
|
236 |
- return(miss_id_file) |
|
237 |
- } |
|
238 |
- |
|
239 |
-} |
240 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "BC189306", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/DESCRIPTION", |
|
4 |
- "project_path": "DESCRIPTION", |
|
5 |
- "type": "dcf", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647626669159.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 17, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "3,9", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647580196, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647580196, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,31 +0,0 @@ |
1 |
-Package: protGear |
|
2 |
-Type: Package |
|
3 |
-Title: Protein Micro Array Data Management and Interactive Visualization |
|
4 |
-Version: 0.99.544 |
|
5 |
-Authors@R: c(person("Kennedy", "Mwai", role = c("cre", "aut"), email = "keniajin@gmail.com"), |
|
6 |
- person(c("James", "Mburu"), role = "aut" , email = "mburuwanja@gmail.com") , |
|
7 |
- person(c("Jacqueline", "Waeni"), role = "ctb" , email = "jacqwaeni@gmail.com")) |
|
8 |
-Description: A generic three-step pre-processing package for protein microarray data. This package contains different data pre-processing procedures to allow comparison of their performance.These steps are background correction, the coefficient of variation (CV) based filtering, batch correction and normalization. |
|
9 |
-License: GPL-3 |
|
10 |
-URL: https://github.com/Keniajin/protGear |
|
11 |
-BugReports: https://github.com/Keniajin/protGear/issues |
|
12 |
-Depends: |
|
13 |
- R (>= 4.2), dplyr (>= 0.8.0) , limma (>= 3.40.2) ,vsn (>= 3.54.0) |
|
14 |
-Imports: |
|
15 |
- magrittr (>= 1.5) , stats (>= 3.6) , ggplot2 (>= 3.3.0) , tidyr (>= 1.1.3) , data.table (>= 1.14.0), |
|
16 |
- ggpubr (>= 0.4.0), gtools (>= 3.8.2) , tibble (>= 3.1.0) , rmarkdown (>= 2.9) , knitr (>= 1.33), |
|
17 |
- utils (>= 3.6), genefilter (>= 1.74.0), readr (>= 2.0.1) , Biobase (>= 2.52.0), |
|
18 |
- plyr (>= 1.8.6) , Kendall (>= 2.2) , shiny (>= 1.0.0) , purrr (>= 0.3.4), plotly (>= 4.9.0) , MASS (>= 7.3) , |
|
19 |
- htmltools (>= 0.4.0) , flexdashboard (>= 0.5.2) , shinydashboard (>= 0.7.1) , kableExtra (>= 1.3.4), |
|
20 |
- GGally (>= 2.1.2) , pheatmap (>= 1.0.12) , grid(>= 4.1.1), |
|
21 |
- styler (>= 1.6.1) , factoextra (>= 1.0.7) ,FactoMineR (>= 2.4) , rlang (>= 0.4.11), |
|
22 |
- remotes (>= 2.4.0) |
|
23 |
-Suggests: |
|
24 |
- gridExtra (>= 2.3), png (>= 0.1-7) , magick (>= 2.7.3) , ggplotify (>= 0.1.0) , scales (>= 1.1.1) , |
|
25 |
- shinythemes (>= 1.2.0) , shinyjs (>= 2.0.0) , shinyWidgets (>= 0.6.2) , |
|
26 |
- shinycssloaders (>= 1.0.0) , shinyalert (>= 3.0.0) , shinyFiles (>= 0.9.1) |
|
27 |
-biocViews: Microarray, OneChannel, Preprocessing , BiomedicalInformatics , Proteomics , BatchEffect, Normalization , Bayesian, Clustering, Regression,SystemsBiology, ImmunoOncology |
|
28 |
-Encoding: UTF-8 |
|
29 |
-LazyData: false |
|
30 |
-RoxygenNote: 7.1.2 |
|
31 |
-VignetteBuilder: knitr |
32 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "DC0B415C", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/githubio_link.R", |
|
4 |
- "project_path": null, |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647626162454.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 16, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "8,0", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1625213270, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1625213270, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,14 +0,0 @@ |
1 |
-##https://www.r-bloggers.com/2017/08/building-a-website-with-pkgdown-a-short-guide/ |
|
2 |
- |
|
3 |
-require(devtools) |
|
4 |
-#use_readme_rmd() |
|
5 |
-#use_news_md() |
|
6 |
-#use_vignette("protGear") |
|
7 |
-use_github_links(overwrite = TRUE) |
|
8 |
- |
|
9 |
-library(pkgdown) |
|
10 |
-## build own vignette automatically |
|
11 |
-#devtools::build_vignettes() |
|
12 |
-build_site() |
|
13 |
- |
|
14 |
- |
15 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,26 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "EE75FEDD", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/protGear/R/cv_by_sample_estimation.R", |
|
4 |
- "project_path": "R/cv_by_sample_estimation.R", |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647521320986.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 13, |
|
12 |
- "properties": { |
|
13 |
- "source_window_id": "", |
|
14 |
- "Source": "Source", |
|
15 |
- "cursorPosition": "16,46", |
|
16 |
- "scrollLine": "0" |
|
17 |
- }, |
|
18 |
- "folds": "", |
|
19 |
- "lastKnownWriteTime": 1647521330, |
|
20 |
- "encoding": "UTF-8", |
|
21 |
- "collab_server": "", |
|
22 |
- "source_window": "", |
|
23 |
- "last_content_update": 1647521330130, |
|
24 |
- "read_only": false, |
|
25 |
- "read_only_alternatives": [] |
|
26 |
-} |
|
27 | 0 |
\ No newline at end of file |
28 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,44 +0,0 @@ |
1 |
-#' Summarise CV by samples |
|
2 |
-#' @title cv by sample |
|
3 |
-#' |
|
4 |
-#' @param cv_variable A character string containing the identifier of the variable with CV values. |
|
5 |
-#' @param lab_replicates A numeric value indicating the number of lab replicates. |
|
6 |
-#' @param sampleID_var A character string containing the name of the sample identifier variable. Default set to 'sampleID' |
|
7 |
-#' @param dataCV A dataframe |
|
8 |
-#' @import dplyr tidyr |
|
9 |
-#' @importFrom tidyr gather |
|
10 |
-#' @importFrom dplyr lag filter |
|
11 |
-#' @description A function to give the summary of the CV's by the sampleID |
|
12 |
-#' @return A data frame of CV calculated by sample |
|
13 |
-#' @export |
|
14 |
-#' |
|
15 |
-#' @examples |
|
16 |
-#' dataC <- readr::read_csv(system.file("extdata", "dataC.csv", package="protGear")) |
|
17 |
-#' ## this file has 3 lab replicates and the default names |
|
18 |
-#' dataCV <- cv_estimation(dataC ,lab_replicates=3) |
|
19 |
-#' cv_by_sample_estimation(dataCV, cv_variable = "cvCat_all", lab_replicates = 3) |
|
20 |
-cv_by_sample_estimation <- |
|
21 |
- function(dataCV, |
|
22 |
- cv_variable, |
|
23 |
- lab_replicates, |
|
24 |
- sampleID_var = 'sampleID') { |
|
25 |
- ## creating a summary of the CV's by sampleID for each file |
|
26 |
- ## helps in identifying samples with a high CV value |
|
27 |
- if (lab_replicates > 1) { |
|
28 |
- iden <- unique(dataCV$iden) |
|
29 |
- dataC_cvSample <- dataCV %>% |
|
30 |
- group_by_at(c(sampleID_var, cv_variable)) %>% |
|
31 |
- summarise(n = n()) %>% |
|
32 |
- mutate(perc = round((n / sum(n)) * 100, 2)) %>% |
|
33 |
- #rename(cvCat='get(cv_variable)') %>% |
|
34 |
- ungroup() %>% |
|
35 |
- gather(variable, value,-c(sampleID_var, cv_variable)) %>% |
|
36 |
- unite(temp,!!(cv_variable), variable) %>% |
|
37 |
- tidyr::spread(temp, value, fill = 0) |
|
38 |
- } else{ |
|
39 |
- dataC_cvSample <- NULL |
|
40 |
- warning("The experiment is specified not to have lab replicates") |
|
41 |
- } |
|
42 |
- |
|
43 |
- return(dataC_cvSample) |
|
44 |
- } |
45 | 0 |
deleted file mode 100644 |
... | ... |
@@ -1,27 +0,0 @@ |
1 |
-{ |
|
2 |
- "id": "F4C665F5", |
|
3 |
- "path": "C:/Users/kmwai/OneDrive - Kemri Wellcome Trust/H_Drive/PhD_work/projects/protGear_data/examples_functions.R", |
|
4 |
- "project_path": null, |
|
5 |
- "type": "r_source", |
|
6 |
- "hash": "0", |
|
7 |
- "contents": "", |
|
8 |
- "dirty": false, |
|
9 |
- "created": 1647515325438.0, |
|
10 |
- "source_on_save": false, |
|
11 |
- "relative_order": 7, |
|
12 |
- "properties": { |
|
13 |
- "tempName": "Untitled4", |
|
14 |
- "source_window_id": "", |
|
15 |
- "Source": "Source", |
|
16 |
- "cursorPosition": "8,45", |
|
17 |
- "scrollLine": "0" |
|
18 |
- }, |
|
19 |
- "folds": "", |
|
20 |
- "lastKnownWriteTime": 1639483627, |
|
21 |
- "encoding": "UTF-8", |
|
22 |
- "collab_server": "", |
|
23 |
- "source_window": "", |
|
24 |
- "last_content_update": 1639483627, |
|
25 |
- "read_only": false, |
|
26 |
- "read_only_alternatives": [] |
|
27 |
-} |
|
28 | 0 |
\ No newline at end of file |
29 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,97 +0,0 @@ |
1 |
-library(protGear) |
|
2 |
-system.file("extdata", "/array_data/machine1/KK2-06.txt", package="protGear") |
|
3 |
-visualize_slide( |
|
4 |
- infile = system.file("extdata", "/array_data/machine1/KK2-06.txt", package="protGear"), |
|
5 |
- MFI_var = "B635 Median" |
|
6 |
-) |
|
7 |
- |
|
8 |
- |
|
9 |
-## specify the the parameters to process the data |
|
10 |
-genepix_vars <- array_vars( |
|
11 |
- channel = "635", |
|
12 |
- chip_path = file.path(system.file("extdata", "array_data/machine1/", package="protGear")), |
|
13 |
- totsamples = 21, |
|
14 |
- blockspersample = 2, |
|
15 |
- mig_prefix = "_first", |
|
16 |
- machine = 1, |
|
17 |
- ## optional |
|
18 |
- date_process = "0520" |
|
19 |
-) |
|
20 |
- |
|
21 |
- |
|
22 |
- |
|
23 |
- |
|
24 |
-genepix_vars <- array_vars( |
|
25 |
- channel = "635", |
|
26 |
- chip_path = file.path(system.file("extdata", "array_data/machine1/", package="protGear")), |
|
27 |
- totsamples = 21, |
|
28 |
- blockspersample = 2, |
|
29 |
- mig_prefix = "_first", |
|
30 |
- machine = 1, |
|
31 |
- ## optional |
|
32 |
- date_process = "0520" |
|
33 |
-) |
|
34 |
- |
|
35 |
-data_path <- paste0(genepix_vars$chip_path) |
|
36 |
-filenames <- list.files(file.path(genepix_vars$chip_path), |
|
37 |
- pattern = "*.txt$|*.gpr$", full.names = F |
|
38 |
-) |
|
39 |
-data_files <- purrr::map( |
|
40 |
- .x = filenames, |
|
41 |
- .f = read_array_files, |
|
42 |
- data_path = data_path, |
|
43 |
- genepix_vars = genepix_vars |
|
44 |
-) |
|
45 |
-data_files <- set_names(data_files, purrr::map(filenames, name_of_files)) |
|
46 |
-names(data_files) |
|
47 |
-bg <- extract_bg(iden ="KK2-06" , data_files=data_files,genepix_vars=genepix_vars) |
|
48 |
- |
|
49 |
-readr::write_csv(bg %>% select(-slide),"inst/extdata/bg_example.csv") |
|
50 |
-plot_bg(bg, |
|
51 |
- antigen_name = "antigen", |
|
52 |
- bg_MFI = "BG_Median", FG_MFI = "FBG_Median", log = F |
|
53 |
-) |
|
54 |
- |
|
55 |
- |
|
56 |
- |
|
57 |
-### Define the genepix_vars |
|
58 |
-genepix_vars <- array_vars( |
|
59 |
- channel = "635", |
|
60 |
- chip_path = file.path(system.file("extdata", "array_data/machine1/", package="protGear")), |
|
61 |
- totsamples = 21, |
|
62 |
- blockspersample = 2, |
|
63 |
- mig_prefix = "_first", |
|
64 |
- machine = 1, |
|
65 |
- ## optional |
|
66 |
- date_process = "0520" |
|
67 |
-) |
|
68 |
- |
|
69 |
-## the path where the micro-array data is located |
|
70 |
-data_path <- paste0(genepix_vars$chip_path) |
|
71 |
-filenames <- list.files(file.path(genepix_vars$chip_path), |
|
72 |
- pattern = "*.txt$|*.gpr$", full.names = F |
|
73 |
-) |
|
74 |
-## create a list of all the files |
|
75 |
-data_files <- purrr::map( |
|
76 |
- .x = filenames, |
|
77 |
- .f = read_array_files, |
|
78 |
- data_path = data_path, |
|
79 |
- genepix_vars = genepix_vars |
|
80 |
-) |
|
81 |
-data_files <- set_names(data_files, purrr::map(filenames, name_of_files)) |
|
82 |
- |
|
83 |
-merge_sampleID(iden = "KK2-06", data_files = data_files, |
|
84 |
- genepix_vars =genepix_vars,method = "subtract_global" ) |
|
85 |
-readr::write_csv(df_1,"inst/extdata/Data1_sample.csv") |
|
86 |
- |
|
87 |
-readr::write_csv(Data1,"inst/extdata/Data1_bg_sample.csv") |
|
88 |
- |
|
89 |
- |
|
90 |
- |
|
91 |
-## buffers |
|
92 |
- bg_correct_df <- readr::read_csv(system.file("extdata", "Data1_sample.csv", package="protGear")) |
|
93 |
-aa <- buffer_spots(Data1 = bg_correct_df) |
|
94 |
-readr::write_csv(aa %>% mutate(.id="slideid"),"inst/extdata/buffers_sample2.csv") |
|
95 |
- |
|
96 |
-buffers <- readr::read_csv(system.file("extdata", "buffers_sample2.csv", package="protGear")) |
|
97 |
-plot_buffer(df=buffers,buffer_names = "sampleID") |