... | ... |
@@ -21,12 +21,14 @@ export(lm_adjust) |
21 | 21 |
export(make_design) |
22 | 22 |
export(metric_sample_filter) |
23 | 23 |
export(scone) |
24 |
+export(scone_wrapper) |
|
24 | 25 |
export(score_matrix) |
25 | 26 |
import(BiocParallel) |
26 | 27 |
import(gplots) |
27 | 28 |
importFrom(DESeq,estimateSizeFactorsForMatrix) |
28 | 29 |
importFrom(EDASeq,betweenLaneNormalization) |
29 | 30 |
importFrom(MASS,glm.nb) |
31 |
+importFrom(RColorBrewer,brewer.pal) |
|
30 | 32 |
importFrom(RUVSeq,RUVg) |
31 | 33 |
importFrom(aroma.light,normalizeQuantileRank.matrix) |
32 | 34 |
importFrom(boot,inv.logit) |
... | ... |
@@ -44,6 +46,8 @@ importFrom(graphics,hist) |
44 | 46 |
importFrom(graphics,par) |
45 | 47 |
importFrom(graphics,plot) |
46 | 48 |
importFrom(graphics,text) |
49 |
+importFrom(hexbin,hexbin) |
|
50 |
+importFrom(hexbin,plot) |
|
47 | 51 |
importFrom(limma,lmFit) |
48 | 52 |
importFrom(matrixStats,colIQRs) |
49 | 53 |
importFrom(matrixStats,colMedians) |
50 | 54 |
deleted file mode 100644 |
... | ... |
@@ -1,420 +0,0 @@ |
1 |
-library(Biobase) #for AnnotatedDataFrame if scone is not loaded |
|
2 |
-library(scone) |
|
3 |
-library(RColorBrewer) |
|
4 |
-library(boot) #for logit |
|
5 |
- |
|
6 |
-#A wrapper for filtering and running scone |
|
7 |
-#currently supports only one-variable bio and batch |
|
8 |
-scone_something = function(selected_eSet, housekeeping_list, |
|
9 |
- batch, bio = NULL, de = NULL, |
|
10 |
- adjust_bio = "no", adjust_batch = "yes", rezero = TRUE, |
|
11 |
- out_dir = getwd(), stratified_pam = TRUE, |
|
12 |
- selected_expression_in_eSet = "counts_table", |
|
13 |
- results_image_filename = "sconeResults.Rda", |
|
14 |
- plots_filename = "scone_plots.pdf") { |
|
15 |
- |
|
16 |
-printf <- function(...) cat(sprintf(...)) |
|
17 |
-set.seed(112233) ## reproducibility |
|
18 |
-if (!file.exists(out_dir)) { |
|
19 |
- dir.create(out_dir, recursive=TRUE) |
|
20 |
-} |
|
21 |
-pdf(file.path(out_dir, plots_filename), onefile=TRUE) |
|
22 |
- |
|
23 |
-#keep only protein coding genes |
|
24 |
-keepGenes = featureData(selected_eSet)$Transcript_Type == "protein_coding" |
|
25 |
-selected_eSet = selected_eSet[keepGenes,] |
|
26 |
-printf("Removed %d non-protein coding genes, %d protein-coding genes left\n", sum(!keepGenes), sum(keepGenes)) |
|
27 |
- |
|
28 |
- |
|
29 |
-#don't use exprs because we have several units (e.g., counts, tpm, fpkm) for each eSet |
|
30 |
-#instead, dynamically access the assayData with the user's argument |
|
31 |
-selected_unit_exprs = get(selected_expression_in_eSet, envir = assayData(selected_eSet)) |
|
32 |
-selected_unit_exprs[is.na(selected_unit_exprs)] = 0 |
|
33 |
-E = aggregate(selected_unit_exprs, by=list(featureData(selected_eSet)$Gene_Symbol), |
|
34 |
- FUN=max, na.rm=TRUE) |
|
35 |
-rownames(E) = E[,1] |
|
36 |
-E = E[, -1] |
|
37 |
-E = data.matrix(E) |
|
38 |
-E[E == -Inf] = 0 #cases in which the max had no non-NA option - should not happen because we turned NAs into 0's already |
|
39 |
-stopifnot(all(is.finite(E))) |
|
40 |
-printf("collapsed %d transcripts into %d genes by gene symbols\n", nrow(selected_unit_exprs), nrow(E)) |
|
41 |
- |
|
42 |
-#now to remove duplicated features from the featuresData (we know that the same gene |
|
43 |
-#symbol will have the same type, so no harm in just removing duplicate rows) and then |
|
44 |
-#reordering it according to the new expression matrix |
|
45 |
-featureDat = featureData(selected_eSet) |
|
46 |
-featureDat = featureDat[!duplicated(featureDat$Gene_Symbol)] |
|
47 |
-featureDat = featureDat[rownames(E), ] |
|
48 |
- |
|
49 |
-# Expression set object to make filtering easier |
|
50 |
-chosen_units_eSet = new("ExpressionSet", exprs = E, featureData = featureDat, |
|
51 |
- protocolData = protocolData(selected_eSet), phenoData = phenoData(selected_eSet)) |
|
52 |
- |
|
53 |
-# Remove failed samples: |
|
54 |
-is.failed = apply(is.na(exprs(chosen_units_eSet)) | (exprs(chosen_units_eSet) == 0), 2, all) | |
|
55 |
- apply(is.na(pData(protocolData(chosen_units_eSet))), 1, all) |
|
56 |
-chosen_units_eSet = chosen_units_eSet[,!is.failed ] |
|
57 |
-printf("Removed %d failed samples\n", sum(is.failed)) |
|
58 |
- |
|
59 |
- |
|
60 |
-## ----- Pre-Filtering of Transcripts: ---- |
|
61 |
-# Select only coding transcripts and TCR segments |
|
62 |
-is.expressed.sc = rowMeans(exprs(chosen_units_eSet)) > 0 |
|
63 |
-chosen_units_eSet = chosen_units_eSet[is.expressed.sc, ] |
|
64 |
-printf("Removed %d undetected genes, %d left\n",sum(!is.expressed.sc), sum(is.expressed.sc)) |
|
65 |
- |
|
66 |
- |
|
67 |
-# Params for gene filtering |
|
68 |
-# turns out that the filtering of the Th17 dataset was pretty robust to the choice of the quantile |
|
69 |
-thresh_fail = quantile(exprs(chosen_units_eSet)[exprs(chosen_units_eSet) > 0], 0.2) #10 |
|
70 |
-num_fail = 10 |
|
71 |
- |
|
72 |
-# Initial Gene Filtering |
|
73 |
-init.gf.vec = rowSums(exprs(chosen_units_eSet) > thresh_fail) > num_fail |
|
74 |
-chosen_units_eSet = chosen_units_eSet[init.gf.vec, ] |
|
75 |
-printf("Kept only %d genes expressed in more than %.2f units in more than %d cells , excluded %d genes\n", sum(init.gf.vec), thresh_fail, num_fail, sum(!init.gf.vec)) |
|
76 |
- |
|
77 |
- |
|
78 |
- |
|
79 |
- |
|
80 |
- |
|
81 |
- |
|
82 |
-#The counts data frame contains feature-level read counts from tophat alignments of 96 single-cell libraries to the mm10 reference genome [@trapnell2009]. qc contains library alignment metrics obtained from Picard. |
|
83 |
-#de and hk are positive and negative control gene sets derived from population studies. batch and bio are factors labeling batch and time point respectively. Consider the joint distribution of these factors: |
|
84 |
-counts = exprs(chosen_units_eSet) |
|
85 |
-qc = pData(protocolData(chosen_units_eSet)) |
|
86 |
- |
|
87 |
-#prevent casing problems |
|
88 |
-rownames(counts) = sapply(rownames(counts), toupper) |
|
89 |
-housekeeping_genes = sapply(as.vector(as.matrix(read.table(housekeeping_list, stringsAsFactors = FALSE))), toupper) |
|
90 |
-hk = intersect(housekeeping_genes, rownames(counts)) |
|
91 |
- |
|
92 |
- |
|
93 |
- |
|
94 |
-##################################### |
|
95 |
-#start to do real work |
|
96 |
-##################################### |
|
97 |
-unnecessaryQC_metrics = apply(is.na(qc), 2, any) | apply(qc, 2, sd) < 1e-5 |
|
98 |
-printf("remove %d QC metrics (%s) that either contained NAs or were constant across cells\n", sum(unnecessaryQC_metrics), paste(colnames(qc)[unnecessaryQC_metrics], collapse = "; ")) |
|
99 |
-qc = qc[, !unnecessaryQC_metrics] |
|
100 |
- |
|
101 |
-colnames(qc) |
|
102 |
- |
|
103 |
-## Joint distribution of batches and biological conditions (time after induction) |
|
104 |
-if(!is.null(bio)) { |
|
105 |
- table(batch,bio) |
|
106 |
-} |
|
107 |
- |
|
108 |
-# Color scheme |
|
109 |
-cc <- c(brewer.pal(9, "Set1"), brewer.pal(8, "Set2"),brewer.pal(9,"Set3")) |
|
110 |
- |
|
111 |
-# Barplot of read proportion mapping to the genome |
|
112 |
-barplot(qc$RALIGN[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Percentage of mapped reads, colored by batch") |
|
113 |
-legend("bottomleft", legend=levels(batch), fill=cc,cex=0.4) |
|
114 |
- |
|
115 |
-# Barplot of total read number |
|
116 |
-barplot(qc$NREADS[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Total number of reads, colored by batch") |
|
117 |
-legend("topright", legend=levels(batch), fill=cc, cex=0.4) |
|
118 |
- |
|
119 |
-qpc = prcomp(qc, center = TRUE, scale = TRUE) |
|
120 |
-barplot(cumsum(qpc$sdev^2)/sum(qpc$sdev^2), border="gray", xlab="PC", ylab="proportion of variance", main="Quality PCA") |
|
121 |
- |
|
122 |
-barplot(qpc$x[,1][order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="Quality PC1, colored by batch") |
|
123 |
-legend("bottomleft", legend=levels(batch), fill=cc, cex=0.8) |
|
124 |
- |
|
125 |
- |
|
126 |
-# Mean log10(x+1) expression |
|
127 |
-mu_obs = rowMeans(log10(counts[hk,]+1)) #0 if hk == NULL |
|
128 |
- |
|
129 |
-# Drop-outs |
|
130 |
-drop_outs = counts[hk,] == 0 |
|
131 |
- |
|
132 |
-# Logistic Regression Model of Failure |
|
133 |
-logistic_coef = matrix(0,ncol(drop_outs),2) |
|
134 |
-for (si in seq_len(ncol(drop_outs))){ |
|
135 |
- fit = suppressWarnings(glm(cbind(drop_outs[,si],1 - drop_outs[,si]) ~ mu_obs,family=binomial(logit))) |
|
136 |
- if(fit$converged){ |
|
137 |
- logistic_coef[si,] = fit$coefficients |
|
138 |
- } else { |
|
139 |
- logistic_coef[si,1] = logit(drop_rate[si]) |
|
140 |
- } |
|
141 |
-} |
|
142 |
- |
|
143 |
- |
|
144 |
-par(mfrow=c(1,2)) |
|
145 |
- |
|
146 |
-# Plot Failure Curves and Calculate AUC |
|
147 |
-plot(NULL, main = "False Negative Rate Curves",ylim = c(0,1),xlim = c(0,6), ylab = "Failure Probability", xlab = "Mean log10 Expression") |
|
148 |
-x = (0:60)/10 |
|
149 |
-AUC = NULL |
|
150 |
-for(si in 1:ncol(counts)){ |
|
151 |
- y = 1/(exp(-logistic_coef[si,1] - logistic_coef[si,2] * x) + 1) |
|
152 |
- AUC[si] = sum(y)/10 |
|
153 |
- lines(x, 1/(exp(-logistic_coef[si,1] - logistic_coef[si,2] * x) + 1), type = 'l', lwd = 2, col = cc[batch][si]) |
|
154 |
-} |
|
155 |
- |
|
156 |
-# Barplot of FNR AUC |
|
157 |
-barplot(AUC[order(batch)], col=cc[batch][order(batch)], border=cc[batch][order(batch)], main="FNR AUC, colored by batch") |
|
158 |
-legend("topleft", legend=levels(batch), fill=cc, cex=0.4) |
|
159 |
- |
|
160 |
-mfilt_report <- metric_sample_filter(expr = counts, |
|
161 |
- nreads = qc$NREADS,ralign = qc$RALIGN, |
|
162 |
- suff_nreads = 10^5, |
|
163 |
- suff_ralign = 90, |
|
164 |
- pos_controls = rownames(counts) %in% hk, |
|
165 |
- zcut = 3,mixture = FALSE, plot = TRUE) |
|
166 |
- |
|
167 |
- |
|
168 |
-hist(qc$RALIGN, breaks = 0:100) |
|
169 |
-# Hard threshold |
|
170 |
-abline(v = 15, col = "yellow", lwd = 2) |
|
171 |
-# 3 (zcut) standard deviations below the mean ralign value |
|
172 |
-abline(v = mean(qc$RALIGN) - 3*sd(qc$RALIGN), col = "green", lwd = 2) |
|
173 |
-# 3 (zcut) MADs below the median ralign value |
|
174 |
-abline(v = median(qc$RALIGN) - 3*mad(qc$RALIGN), col = "red", lwd = 2) |
|
175 |
-# Sufficient threshold |
|
176 |
-abline(v = 90, col = "grey", lwd = 2) |
|
177 |
-# Final threshold is the minimum of 1) the sufficient threshold and 2) the max of all others |
|
178 |
-thresh = min(90,max(c(15,mean(qc$RALIGN) - 3*sd(qc$RALIGN),median(qc$RALIGN) - 3*mad(qc$RALIGN)))) |
|
179 |
-abline(v = thresh, col = "blue", lwd = 2, lty = 2) |
|
180 |
- |
|
181 |
-legend("topleft",legend = c("Hard","SD","MAD","Sufficient","Final"),lwd = 2, col = c("yellow","green","red","grey","blue"),lty = c(1,1,1,1,2), cex = .5) |
|
182 |
- |
|
183 |
- |
|
184 |
-# Which thresholds are missing? (breadth) |
|
185 |
-is_na_filt = unlist(lapply(is.na(mfilt_report),any)) |
|
186 |
- |
|
187 |
-# Identify samples failing any threshold |
|
188 |
-m_sampfilter = !apply(simplify2array(mfilt_report[!is_na_filt]),1,any) |
|
189 |
- |
|
190 |
-# Filter Samples |
|
191 |
-fcounts = counts[,m_sampfilter] |
|
192 |
-fqc = qc[m_sampfilter,] |
|
193 |
-fbatch = batch[m_sampfilter] |
|
194 |
-fbio = bio[m_sampfilter] #if bio==NULL then we get from this line fbio=NULL |
|
195 |
- |
|
196 |
-# # Simple gene filter --> redundant with what I have above |
|
197 |
-# filterCount <- function(counts, nRead=5, nCell=5){ |
|
198 |
-# filter <- apply(counts, 1, function(x) length(x[x>=nRead])>=nCell) |
|
199 |
-# return(filter) |
|
200 |
-# } |
|
201 |
-# genefilter <- filterCount(fcounts) |
|
202 |
-# fcounts = fcounts[genefilter,] |
|
203 |
- |
|
204 |
-# skip filtering genes again |
|
205 |
-fcounts = counts |
|
206 |
-fhk = hk[hk %in% rownames(fcounts)] |
|
207 |
-fde = de[de %in% rownames(fcounts)] #if de==NULL then we get from this line fde=NULL |
|
208 |
- |
|
209 |
- |
|
210 |
-#FNR |
|
211 |
-#Michael: I see you recommend bulk_model=true even though the default is false? Why? |
|
212 |
-#I'm not clear on what this parameter does - what other gene features are possible if gFeatM is not specified? |
|
213 |
-fnr_out = estimate_ziber(x = fcounts, bulk_model = TRUE, |
|
214 |
- pos_controls = rownames(fcounts) %in% fhk, |
|
215 |
- verbose = TRUE, maxiter = 1000) |
|
216 |
- |
|
217 |
- |
|
218 |
-SUM_FN = function (ei) |
|
219 |
-{ |
|
220 |
- sums = colSums(ei) |
|
221 |
- eo = t(t(ei)*mean(sums)/sums) |
|
222 |
- return(eo) |
|
223 |
-} |
|
224 |
- |
|
225 |
-MED_FN_POS = function (ei) |
|
226 |
-{ |
|
227 |
- zei = ei |
|
228 |
- zei[ei == 0] = NA |
|
229 |
- q = apply(zei, 2, quantile, 0.5, na.rm = TRUE) |
|
230 |
- zeo = t(t(zei)/q) * mean(q) |
|
231 |
- eo = zeo |
|
232 |
- eo[ei == 0] = 0 |
|
233 |
- return(eo) |
|
234 |
-} |
|
235 |
- |
|
236 |
-EFF_FN = function (ei) |
|
237 |
-{ |
|
238 |
- sums = colSums(ei > 0) |
|
239 |
- eo = t(t(ei)*sums/mean(sums)) |
|
240 |
- return(eo) |
|
241 |
-} |
|
242 |
- |
|
243 |
-imputation = list(none=impute_null,expect=impute_expectation) |
|
244 |
-impute_args = list(p_nodrop = fnr_out$p_nodrop, mu = exp(fnr_out$Alpha[1,])) |
|
245 |
- |
|
246 |
-scaling = list(none=identity, |
|
247 |
- sum = SUM_FN, |
|
248 |
- eff = EFF_FN, |
|
249 |
- tmm = TMM_FN, |
|
250 |
- uq = UQ_FN, |
|
251 |
- uqp = UQ_FN_POS, |
|
252 |
- fq = FQT_FN, |
|
253 |
- fqp = FQ_FN_POS, |
|
254 |
- deseq=DESEQ_FN, |
|
255 |
- deseqp=DESEQ_FN_POS) |
|
256 |
- |
|
257 |
- |
|
258 |
- |
|
259 |
-# Generate Params (RUN = FALSE) |
|
260 |
-#Michael: in your example qc=ppq which is ppq = scale(qc,center = TRUE,scale = TRUE) |
|
261 |
-#but I see in scone's code that it expects QC matrix and does the prcomp itself |
|
262 |
-params <- scone(expr = as.matrix(fcounts), scaling = scaling, |
|
263 |
- imputation = imputation, impute_args = impute_args, |
|
264 |
- ruv_negcon = fhk, k_ruv = 3, |
|
265 |
- qc = as.matrix(fqc), k_qc = 3, |
|
266 |
- bio = fbio, adjust_bio = adjust_bio, |
|
267 |
- batch = fbatch, adjust_batch = adjust_batch, |
|
268 |
- run = FALSE) |
|
269 |
-head(params) |
|
270 |
- |
|
271 |
-apply(params,2,unique) |
|
272 |
- |
|
273 |
-#exclide batch adjustment and no adjustment by bio and similar not-making-sense scenarios |
|
274 |
-is_screened = ((params$imputation_method == "none") & (params$scaling_method %in% c("deseq","uq","tmm"))) | |
|
275 |
- ((params$imputation_method == "expect") & (params$scaling_method %in% c("deseqp","uqp","fqp","eff"))) | |
|
276 |
- ((params$adjust_biology == "bio") & (params$adjust_batch != "batch")) |
|
277 |
- |
|
278 |
-params = params[!is_screened,] |
|
279 |
- |
|
280 |
-##NO MORE PLOTS! |
|
281 |
-dev.off() |
|
282 |
- |
|
283 |
-# Generate Scores and Ranking |
|
284 |
-tic = proc.time() |
|
285 |
-res = scone(expr = as.matrix(fcounts), scaling = scaling, |
|
286 |
- imputation = imputation, impute_args = impute_args, |
|
287 |
- ruv_negcon = fhk, k_ruv = 3, |
|
288 |
- qc = as.matrix(fqc), k_qc = 3, |
|
289 |
- bio = fbio, adjust_bio = adjust_bio, |
|
290 |
- batch = fbatch, adjust_batch = adjust_batch, |
|
291 |
- run = TRUE, params = params, verbose = TRUE, |
|
292 |
- eval_poscon = fde, eval_kclust = 2:3, stratified_pam = stratified_pam, |
|
293 |
- rezero = rezero, return_norm = "in_memory") |
|
294 |
-toc = proc.time() |
|
295 |
- |
|
296 |
-printf("time elapsed:\n") |
|
297 |
-print(toc-tic) |
|
298 |
- |
|
299 |
-save.image(file.path(out_dir, results_image_filename)) |
|
300 |
- |
|
301 |
- |
|
302 |
-printf("Scone finished successfully and image saved") |
|
303 |
- |
|
304 |
-} |
|
305 |
- |
|
306 |
- |
|
307 |
-#code to run this method and test it |
|
308 |
-if(FALSE) { |
|
309 |
- rm(list=ls()) |
|
310 |
- #library(bioc2016singlecell) |
|
311 |
- set.seed(112233) ## for reproducibility |
|
312 |
- printf <- function(...) cat(sprintf(...)) |
|
313 |
- |
|
314 |
- library(Biobase) #for AnnotatedDataFrame if scone is not loaded |
|
315 |
- library(scone) |
|
316 |
- library(RColorBrewer) |
|
317 |
- |
|
318 |
- BiocParallel::register(BiocParallel::SerialParam()) |
|
319 |
- |
|
320 |
- setwd("/data/yosef2/Published_Data/TH17/code") |
|
321 |
- source("scone_something.R") |
|
322 |
- |
|
323 |
- load("/data/yosef2/Published_Data/TH17/code/combined_th17_metadata.Rdata") |
|
324 |
- housekeeping_list = "/data/yosef/CD8_effector_diff/src/SummaryPipeline/house_keeping_mouse_TitleCase.txt" |
|
325 |
- out_dir = "/data/yosef2/Published_Data/TH17processed_aw20160712/scone/only_T16" |
|
326 |
- |
|
327 |
- #only T16 from the non-GFP mouse |
|
328 |
- keep = combinedMetadata$michael_condition == "TGFB1_IL6-48h" & |
|
329 |
- !combinedMetadata$is_population |
|
330 |
- |
|
331 |
- combinedMetadata = combinedMetadata[keep,] |
|
332 |
- |
|
333 |
- #tophaht + featureCounts |
|
334 |
- collect_dir="/data/yosef2/Published_Data/TH17/processed_aw20160712/collect" |
|
335 |
- load(file.path(collect_dir, "collectedRNASeqStudy.RData")) |
|
336 |
- selected_eSet = collectedRNASeqStudy$cuff_eSet |
|
337 |
- #featureNames(selected_eSet) = toupper(featureNames(selected_eSet)) |
|
338 |
- |
|
339 |
- selected_eSet = selected_eSet[, combinedMetadata$SRR] |
|
340 |
- batch = combinedMetadata$michael_batch |
|
341 |
- scone_something(selected_eSet, housekeeping_list = housekeeping_list, |
|
342 |
- selected_expression_in_eSet = "counts_table", |
|
343 |
- batch = batch, bio = NULL, de = NULL, |
|
344 |
- adjust_bio = "no", adjust_batch = "yes", rezero = TRUE, |
|
345 |
- out_dir = getwd(), stratified_pam = TRUE, |
|
346 |
- results_image_filename = "sconeResults_tophatFeatureCounts.Rda", |
|
347 |
- plots_filename = "scone_plot_tophatFeatureCounts.pdf") |
|
348 |
- |
|
349 |
- stop("finished") |
|
350 |
- |
|
351 |
-} |
|
352 |
- |
|
353 |
- |
|
354 |
-#code to inspect scone results |
|
355 |
-if(FALSE) { |
|
356 |
- rm(list=ls()) |
|
357 |
- set.seed(4455) |
|
358 |
- out_dir = "/home/eecs/allonwag/data2/TFH/processed_20160628/scone" |
|
359 |
- load(file.path(out_dir, "sconeResults.Rda")) |
|
360 |
- |
|
361 |
- |
|
362 |
- names(res) |
|
363 |
- |
|
364 |
- if(is.null(bio)) { |
|
365 |
- #no positive control genes (de) so no evaluation of positive controls |
|
366 |
- BIO_DEPENDENT_EVALUATION_METRICS = c("BIO_SIL", "EXP_WV_COR") |
|
367 |
- res$scores = res$scores[, !(colnames(res$scores) %in% BIO_DEPENDENT_EVALUATION_METRICS)] |
|
368 |
- res$metrics = res$metrics[, !(colnames(res$metrics) %in% BIO_DEPENDENT_EVALUATION_METRICS)] |
|
369 |
- } |
|
370 |
- |
|
371 |
- head(res$scores) |
|
372 |
- |
|
373 |
- pc_obj = prcomp(res$scores[,-ncol(res$scores)],center = TRUE,scale = FALSE) |
|
374 |
- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) |
|
375 |
- |
|
376 |
- #library(scone) |
|
377 |
- #if I don't have this library installed |
|
378 |
- source("biplot.R") |
|
379 |
- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) |
|
380 |
- |
|
381 |
- points(bp_obj[grepl(",bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1) |
|
382 |
- points(bp_obj[grepl(",bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1.5) |
|
383 |
- |
|
384 |
- |
|
385 |
- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) |
|
386 |
- |
|
387 |
- points(bp_obj[grepl(",no_bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1) |
|
388 |
- points(bp_obj[grepl(",no_bio,batch",rownames(bp_obj)),], pch = 1, col = "red", cex = 1.5) |
|
389 |
- |
|
390 |
- #my addition to scone's tutorial |
|
391 |
- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) |
|
392 |
- |
|
393 |
- points(bp_obj[grepl(",fq,",rownames(bp_obj)),], pch = 1, col = "green", cex = 1.5) |
|
394 |
- points(bp_obj[grepl(",tmm,",rownames(bp_obj)),], pch = 1, col = "cyan", cex = 1.5) |
|
395 |
- points(bp_obj[grepl(",deseq,",rownames(bp_obj)),], pch = 1, col = "magenta", cex = 1.5) |
|
396 |
- points(bp_obj[grepl(",uqp,",rownames(bp_obj)),], pch = 1, col = "black", cex = 1.5) |
|
397 |
- #end my addition |
|
398 |
- |
|
399 |
- bp_obj = biplot_colored(pc_obj,y = -res$scores[,ncol(res$scores)],expand = .6) |
|
400 |
- |
|
401 |
- points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1) |
|
402 |
- points(t(bp_obj[1,]), pch = 1, col = "red", cex = 1.5) |
|
403 |
- |
|
404 |
- points(t(bp_obj[rownames(bp_obj) == rownames(params)[1],]), pch = 1, col = "blue", cex = 1) |
|
405 |
- points(t(bp_obj[rownames(bp_obj) == rownames(params)[1],]), pch = 1, col = "blue", cex = 1.5) |
|
406 |
- |
|
407 |
- arrows(bp_obj[rownames(bp_obj) == rownames(params)[1],][1], |
|
408 |
- bp_obj[rownames(bp_obj) == rownames(params)[1],][2], |
|
409 |
- bp_obj[1,][1], |
|
410 |
- bp_obj[1,][2], |
|
411 |
- lty = 2, lwd = 2) |
|
412 |
- |
|
413 |
- printf("(Trumpets): the selected method is %s", rownames(res$scores)[1]) |
|
414 |
- normalized_expression = res$normalized_data[[1]] |
|
415 |
- selected_normalization_method = names(res$normalized_data)[1] |
|
416 |
- save(normalized_expression, selected_normalization_method, file=file.path(out_dir, "sconeNormalizedExpression.Rda")) |
|
417 |
- |
|
418 |
- sessionInfo() |
|
419 |
- |
|
420 |
-} |
421 | 0 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,236 @@ |
1 |
+#' Wrapper for Running Essential SCONE Modules |
|
2 |
+#' |
|
3 |
+#' @param expr matrix. The expression data matrix (genes in rows, cells in columns). |
|
4 |
+#' @param qc matrix. The quality control (QC) matrix (cells in rows, metrics in columns) |
|
5 |
+#' to be used for filtering, normalization, and evaluation. |
|
6 |
+#' @param bio factor. The biological condition to be modeled in the Adjustment Step |
|
7 |
+#' as variation to be preserved. If adjust_bio="no", it will not be used |
|
8 |
+#' for normalization, but only for evaluation. |
|
9 |
+#' @param batch factor. The known batch variable to be included in the |
|
10 |
+#' adjustment model as variation to be removed. If adjust_batch="no", it will |
|
11 |
+#' not be used for normalization, but only for evaluation. |
|
12 |
+#' @param negcon character. The genes to be used as negative controls for filtering, |
|
13 |
+#' normalization, and evaluation. These genes should be expressed uniformily across |
|
14 |
+#' the biological phenomenon of interest. |
|
15 |
+#' Default NULL. |
|
16 |
+#' @param verbose character. Verbosity level. |
|
17 |
+#' Default "0". |
|
18 |
+#' @param out_dir character. Output directory. |
|
19 |
+#' Default getwd(). |
|
20 |
+#' @param seed numeric. Random seed. |
|
21 |
+#' Default 112233. |
|
22 |
+#' |
|
23 |
+#' @param filt_genes logical. Should genes be filtered post-sample filtering? |
|
24 |
+#' Default TRUE. |
|
25 |
+#' |
|
26 |
+#' @param fnr_maxiter numeric. Maximum number of iterations in EM estimation of expression posteriors. |
|
27 |
+#' Dafault 1000. |
|
28 |
+#' |
|
29 |
+#' @param norm_impute character. Should imputation be included in the comparison? |
|
30 |
+#' If 'force', only imputed normalizations will be run. |
|
31 |
+#' Default "yes." |
|
32 |
+#' @param norm_scaling character. Scaling options to be included in the Scaling Step. |
|
33 |
+#' Default c("sum", "deseq", "tmm", "uq", "fq"). See details. |
|
34 |
+#' @param norm_rezero logical. Restore imputed zeroes to zero following the Scaling Step? |
|
35 |
+#' Default TRUE. |
|
36 |
+#' @param norm_k_max numeric. Maximum number (norm_k_max) of factors of unwanted variation modeled |
|
37 |
+#' in the Adjustment Step. |
|
38 |
+#' Default NULL. |
|
39 |
+#' @param norm_qc_expl numeric. In automatic selection of norm_k_max, what |
|
40 |
+#' fraction of variation must be explained by the first norm_k_max PCs of qc? |
|
41 |
+#' Default 0.5. Ignored if norm_k_max is not NULL. |
|
42 |
+#' @param norm_adjust_bio character. If 'no' it will not be included in the model; if |
|
43 |
+#' 'yes', both models with and without 'bio' will be run; if 'force', only |
|
44 |
+#' models with 'bio' will be run. |
|
45 |
+#' Default "yes." |
|
46 |
+#' @param norm_adjust_batch character. If 'no' it will not be modeled in the Adjustment Step; |
|
47 |
+#' if 'yes', both models with and without 'batch' will be run; if 'force', |
|
48 |
+#' only models with 'batch' will be run. |
|
49 |
+#' Default "yes." |
|
50 |
+#' |
|
51 |
+#' @param eval_dim numeric. The number of principal components to use for evaluation. |
|
52 |
+#' Default NULL. |
|
53 |
+#' @param eval_expr_expl numeric. In automatic selection of eval_dim, what |
|
54 |
+#' fraction of variation must be explained by the first eval_dim PCs of expr? |
|
55 |
+#' Default 0.2. Ignored if eval_dim is not NULL. |
|
56 |
+#' @param eval_poscon character. The genes to be used as positive controls for |
|
57 |
+#' evaluation. These genes should be expected to change according to the |
|
58 |
+#' biological phenomenon of interest. |
|
59 |
+#' @param eval_kclust numeric. The number of clusters (> 1) to be used for pam |
|
60 |
+#' tightness evaluation. If an array of integers, largest average silhouette |
|
61 |
+#' width (tightness) will be reported. If NULL, tightness will be returned NA. |
|
62 |
+#' @param eval_stratified_pam logical. If TRUE then maximum ASW for PAM_SIL is |
|
63 |
+#' separately computed for each biological-cross-batch condition (accepting |
|
64 |
+#' NAs), and a weighted average is returned as PAM_SIL. |
|
65 |
+#' Default TRUE. |
|
66 |
+#' |
|
67 |
+#' @export |
|
68 |
+#' @importFrom RColorBrewer brewer.pal |
|
69 |
+#' @importFrom boot inv.logit |
|
70 |
+#' @importFrom hexbin plot hexbin |
|
71 |
+#' |
|
72 |
+#' @details "ADD DESCRIPTION" |
|
73 |
+#' |
|
74 |
+#' @return Directory structure "ADD DESCRIPTION" |
|
75 |
+#' |
|
76 |
+ |
|
77 |
+scone_wrapper <- function(expr, qc, |
|
78 |
+ bio=NULL, batch=NULL, negcon=NULL, |
|
79 |
+ verbose=c("0","1","2"), out_dir=getwd(), seed = 112233, |
|
80 |
+ |
|
81 |
+ filt_genes=TRUE, |
|
82 |
+ |
|
83 |
+ fnr_maxiter=1000, |
|
84 |
+ |
|
85 |
+ norm_impute=c("yes", "no", "force"), |
|
86 |
+ norm_scaling=c("sum", "deseq", "tmm", "uq", "fq"), |
|
87 |
+ norm_rezero=TRUE, norm_k_max=NULL, norm_qc_expl=0.5, |
|
88 |
+ norm_adjust_bio=c("yes", "no", "force"), |
|
89 |
+ norm_adjust_batch=c("yes", "no", "force"), |
|
90 |
+ |
|
91 |
+ eval_dim=NULL, eval_expr_expl=0.2, |
|
92 |
+ eval_poscon=NULL, eval_kclust = 2:10, eval_stratified_pam = TRUE) { |
|
93 |
+ |
|
94 |
+ verbose = match.arg(verbose) |
|
95 |
+ verbose = as.numeric(verbose) |
|
96 |
+ |
|
97 |
+ printf <- function(...) cat(sprintf(...)) |
|
98 |
+ set.seed(seed) |
|
99 |
+ |
|
100 |
+ if(!is.matrix(expr)){stop("expr must be a matrix")} |
|
101 |
+ |
|
102 |
+ # Primary Directory |
|
103 |
+ if (!file.exists(out_dir)) { |
|
104 |
+ dir.create(out_dir, recursive=TRUE) |
|
105 |
+ } |
|
106 |
+ |
|
107 |
+ # Misc Directory |
|
108 |
+ misc_dir = paste0(out_dir,"/misc") |
|
109 |
+ if (!file.exists(misc_dir)) { |
|
110 |
+ dir.create(misc_dir) |
|
111 |
+ } |
|
112 |
+ |
|
113 |
+ ## ------ Data Filtering Module ------ |
|
114 |
+ |
|
115 |
+ # Initial Gene Filtering: Select "common" transcripts. |
|
116 |
+ thresh_fail = quantile(expr[expr > 0], 0.2) ###Make argument### |
|
117 |
+ num_fail = 10 ###Make argument### |
|
118 |
+ init.gf.vec = rowSums(expr > thresh_fail) > num_fail |
|
119 |
+ if(verbose > 0){printf("Data Filtering Module: Initial filter - Kept only %d genes expressed in more than %.2f units in more than %d cells, excluded %d genes\n", sum(init.gf.vec), thresh_fail, num_fail, sum(!init.gf.vec))} |
|
120 |
+ |
|
121 |
+ # Initial-Filtering Negative Controls |
|
122 |
+ small_negcon = intersect(negcon,rownames(expr)[init.gf.vec]) |
|
123 |
+ if(verbose > 0){printf("Data Filtering Module: %d negative control genes to be used for sample filtering\n", length(small_negcon))} |
|
124 |
+ |
|
125 |
+ # Metric-based Filtering and Report |
|
126 |
+ pdf(paste0(misc_dir,"/filter_report.pdf")) |
|
127 |
+ mfilt = metric_sample_filter(expr, mixture = TRUE, plot = TRUE, hist_breaks = 100, |
|
128 |
+ zcut = 4, ###Make argument### |
|
129 |
+ pos_controls = rownames(ct) %in% small_negcon, |
|
130 |
+ gene_filter = init.gf.vec, |
|
131 |
+ nreads = qc$NREADS,ralign = qc$RALIGN) ###Make argument### |
|
132 |
+ dev.off() |
|
133 |
+ mfilt = !apply(simplify2array(mfilt[!is.na(mfilt)]),1,any) |
|
134 |
+ if(verbose > 0){printf("Data Filtering Module: %d samples to be used in downstream analysis, excluded %d samples\n", sum(mfilt), sum(!mfilt))} |
|
135 |
+ |
|
136 |
+ # Implement sample filter |
|
137 |
+ expr = expr[,mfilt] |
|
138 |
+ qc = qc[mfilt,] |
|
139 |
+ batch = droplevels(batch[mfilt]) |
|
140 |
+ bio = droplevels(bio[mfilt]) |
|
141 |
+ |
|
142 |
+ if(filt_genes){ |
|
143 |
+ thresh_fail = quantile(expr[expr > 0], 0.2) ###Make argument### |
|
144 |
+ num_fail = 10 ###Make argument### |
|
145 |
+ final.gf.vec = rowSums(expr > thresh_fail) > num_fail |
|
146 |
+ if(verbose > 0){printf("Data Filtering Module: Kept only %d genes expressed in more than %.2f units in more than %d cells, excluded %d genes\n", sum(final.gf.vec), thresh_fail, num_fail, sum(!final.gf.vec))} |
|
147 |
+ |
|
148 |
+ # Implement gene filter |
|
149 |
+ expr = expr[final.gf.vec,] |
|
150 |
+ if(!is.null(negcon)){ |
|
151 |
+ negcon = negcon[negcon %in% rownames(expr)] |
|
152 |
+ if(verbose > 0){printf("Data Filtering Module: Kept only %d negative control genes\n", length(negcon))} |
|
153 |
+ } |
|
154 |
+ if(!is.null(eval_poscon)){ |
|
155 |
+ eval_poscon = eval_poscon[eval_poscon %in% rownames(expr)] |
|
156 |
+ if(verbose > 0){printf("Data Filtering Module: Kept only %d positive control genes\n", length(eval_poscon))} |
|
157 |
+ } |
|
158 |
+ } |
|
159 |
+ |
|
160 |
+ if(verbose > 0){printf("Data Filtering Module: COMPLETE\n\n")} |
|
161 |
+ ## ------ False-Negative Rate Inference Module ------ |
|
162 |
+ |
|
163 |
+ if(verbose > 0){printf("False-Negative Rate Inference Module: Estimating posterior expression probability...\n")} |
|
164 |
+ fnr_out = estimate_ziber(x = expr, |
|
165 |
+ bulk_model = TRUE, ###Make argument### |
|
166 |
+ pos_controls = rownames(expr) %in% hk, |
|
167 |
+ verbose = (verbose > 1), |
|
168 |
+ maxiter = fnr_maxiter) |
|
169 |
+ if(fnr_out$convergence == 1){printf("False-Negative Rate Inference Module: WARNING - EM did not converge. Try increasing fnr_max_iter\n")} |
|
170 |
+ if(verbose > 0){printf("False-Negative Rate Inference Module: Producing report...\n")} |
|
171 |
+ |
|
172 |
+ # FNR Report |
|
173 |
+ cc <- c(brewer.pal(9, "Set1"), brewer.pal(8, "Set2"),brewer.pal(9,"Set3")) |
|
174 |
+ |
|
175 |
+ logistic_coef = simple_FNR_params(expr = expr, pos_controls = rownames(expr) %in% negcon) # "Non-DE" are expected to be expressed in all cells |
|
176 |
+ |
|
177 |
+ pdf(paste0(misc_dir,"/fnr_report.pdf"),width = 12,height = 6) |
|
178 |
+ par(mfrow=c(1,2)) |
|
179 |
+ |
|
180 |
+ plot(NULL, main = "False Negative Rate Curves",ylim = c(0,1),xlim = c(0,15), ylab = "Failure Probability", xlab = "Median log-Expression") |
|
181 |
+ x = (0:150)/10 |
|
182 |
+ AUC = NULL |
|
183 |
+ for(si in 1:ncol(expr)){ |
|
184 |
+ y = inv.logit(logistic_coef[si,1] + logistic_coef[si,2] * x) |
|
185 |
+ AUC[si] = sum(y)/10 |
|
186 |
+ if(!is.null(batch)){ |
|
187 |
+ colchoice = cc[batch][si] |
|
188 |
+ }else{ |
|
189 |
+ colchoice = "black" |
|
190 |
+ } |
|
191 |
+ lines(x, y, type = 'l', lwd = 2, col = colchoice) |
|
192 |
+ } |
|
193 |
+ |
|
194 |
+ # Barplot of FNR AUC |
|
195 |
+ if(!is.null(batch)){ |
|
196 |
+ oAUC = order(AUC) |
|
197 |
+ sAUC = AUC[oAUC] |
|
198 |
+ sbatch = batch[oAUC] |
|
199 |
+ barplot(sAUC[order(sbatch)], col=cc[sbatch][order(sbatch)], border=cc[sbatch][order(sbatch)], main="FNR AUC, colored by batch", ylim = c(0,2*max(AUC))) |
|
200 |
+ legend("topleft", legend=levels(sbatch), fill=cc, cex=0.4) |
|
201 |
+ }else{ |
|
202 |
+ barplot(sort(AUC), col="black", border="black", main="FNR AUC",ylim = c(0,2*max(AUC))) |
|
203 |
+ } |
|
204 |
+ |
|
205 |
+ hexbin::plot(hexbin(rowMeans(expr > 0)[!is.na(fnr_out$Beta)],inv.logit(fnr_out$Beta[!is.na(fnr_out$Beta)])), xlab = "Detection Rate", ylab = "Expression Rate") |
|
206 |
+ hexbin::plot(hexbin(fnr_out$Alpha[1,][!is.na(fnr_out$Beta)],inv.logit(fnr_out$Beta[!is.na(fnr_out$Beta)])), ylab = "Expression Rate", xlab = "Median log-Expression") |
|
207 |
+ |
|
208 |
+ if(verbose > 0){printf("False-Negative Rate Inference Module: COMPLETE\n\n")} |
|
209 |
+ |
|
210 |
+ dev.off() |
|
211 |
+ |
|
212 |
+ ui <- fluidPage( |
|
213 |
+ h1("Example app"), |
|
214 |
+ sidebarLayout( |
|
215 |
+ sidebarPanel( |
|
216 |
+ actionButton("qtBtn", "Quit") |
|
217 |
+ ), |
|
218 |
+ mainPanel( |
|
219 |
+ ) |
|
220 |
+ ) |
|
221 |
+ ) |
|
222 |
+ |
|
223 |
+ server <- function(input, output, session) { |
|
224 |
+ |
|
225 |
+ observeEvent(input$qtBtn,{ |
|
226 |
+ if(input$qtBtn > 0){ |
|
227 |
+ stopApp("hello world") |
|
228 |
+ } |
|
229 |
+ }) |
|
230 |
+ } |
|
231 |
+ |
|
232 |
+ print(runApp(list(ui = ui, server = server))) |
|
233 |
+ |
|
234 |
+ sessionInfo() |
|
235 |
+ |
|
236 |
+} |
|
0 | 237 |
\ No newline at end of file |
1 | 238 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,107 @@ |
1 |
+% Generated by roxygen2: do not edit by hand |
|
2 |
+% Please edit documentation in R/scone_wrap.R |
|
3 |
+\name{scone_wrapper} |
|
4 |
+\alias{scone_wrapper} |
|
5 |
+\title{Wrapper for Running Essential SCONE Modules} |
|
6 |
+\usage{ |
|
7 |
+scone_wrapper(expr, qc, bio = NULL, batch = NULL, negcon = NULL, |
|
8 |
+ verbose = c("0", "1", "2"), out_dir = getwd(), seed = 112233, |
|
9 |
+ filt_genes = TRUE, fnr_maxiter = 1000, norm_impute = c("yes", "no", |
|
10 |
+ "force"), norm_scaling = c("sum", "deseq", "tmm", "uq", "fq"), |
|
11 |
+ norm_rezero = TRUE, norm_k_max = NULL, norm_qc_expl = 0.5, |
|
12 |
+ norm_adjust_bio = c("yes", "no", "force"), norm_adjust_batch = c("yes", |
|
13 |
+ "no", "force"), eval_dim = NULL, eval_expr_expl = 0.2, |
|
14 |
+ eval_poscon = NULL, eval_kclust = 2:10, eval_stratified_pam = TRUE) |
|
15 |
+} |
|
16 |
+\arguments{ |
|
17 |
+\item{expr}{matrix. The expression data matrix (genes in rows, cells in columns).} |
|
18 |
+ |
|
19 |
+\item{qc}{matrix. The quality control (QC) matrix (cells in rows, metrics in columns) |
|
20 |
+to be used for filtering, normalization, and evaluation.} |
|
21 |
+ |
|
22 |
+\item{bio}{factor. The biological condition to be modeled in the Adjustment Step |
|
23 |
+as variation to be preserved. If adjust_bio="no", it will not be used |
|
24 |
+for normalization, but only for evaluation.} |
|
25 |
+ |
|
26 |
+\item{batch}{factor. The known batch variable to be included in the |
|
27 |
+adjustment model as variation to be removed. If adjust_batch="no", it will |
|
28 |
+not be used for normalization, but only for evaluation.} |
|
29 |
+ |
|
30 |
+\item{negcon}{character. The genes to be used as negative controls for filtering, |
|
31 |
+normalization, and evaluation. These genes should be expressed uniformily across |
|
32 |
+the biological phenomenon of interest. |
|
33 |
+Default NULL.} |
|
34 |
+ |
|
35 |
+\item{verbose}{character. Verbosity level. |
|
36 |
+Default "0".} |
|
37 |
+ |
|
38 |
+\item{out_dir}{character. Output directory. |
|
39 |
+Default getwd().} |
|
40 |
+ |
|
41 |
+\item{seed}{numeric. Random seed. |
|
42 |
+Default 112233.} |
|
43 |
+ |
|
44 |
+\item{filt_genes}{logical. Should genes be filtered post-sample filtering? |
|
45 |
+Default TRUE.} |
|
46 |
+ |
|
47 |
+\item{fnr_maxiter}{numeric. Maximum number of iterations in EM estimation of expression posteriors. |
|
48 |
+Dafault 1000.} |
|
49 |
+ |
|
50 |
+\item{norm_impute}{character. Should imputation be included in the comparison? |
|
51 |
+If 'force', only imputed normalizations will be run. |
|
52 |
+Default "yes."} |
|
53 |
+ |
|
54 |
+\item{norm_scaling}{character. Scaling options to be included in the Scaling Step. |
|
55 |
+Default c("sum", "deseq", "tmm", "uq", "fq"). See details.} |
|
56 |
+ |
|
57 |
+\item{norm_rezero}{logical. Restore imputed zeroes to zero following the Scaling Step? |
|
58 |
+Default TRUE.} |
|
59 |
+ |
|
60 |
+\item{norm_k_max}{numeric. Maximum number (norm_k_max) of factors of unwanted variation modeled |
|
61 |
+in the Adjustment Step. |
|
62 |
+Default NULL.} |
|
63 |
+ |
|
64 |
+\item{norm_qc_expl}{numeric. In automatic selection of norm_k_max, what |
|
65 |
+fraction of variation must be explained by the first norm_k_max PCs of qc? |
|
66 |
+Default 0.5. Ignored if norm_k_max is not NULL.} |
|
67 |
+ |
|
68 |
+\item{norm_adjust_bio}{character. If 'no' it will not be included in the model; if |
|
69 |
+'yes', both models with and without 'bio' will be run; if 'force', only |
|
70 |
+models with 'bio' will be run. |
|
71 |
+Default "yes."} |
|
72 |
+ |
|
73 |
+\item{norm_adjust_batch}{character. If 'no' it will not be modeled in the Adjustment Step; |
|
74 |
+if 'yes', both models with and without 'batch' will be run; if 'force', |
|
75 |
+only models with 'batch' will be run. |
|
76 |
+Default "yes."} |
|
77 |
+ |
|
78 |
+\item{eval_dim}{numeric. The number of principal components to use for evaluation. |
|
79 |
+Default NULL.} |
|
80 |
+ |
|
81 |
+\item{eval_expr_expl}{numeric. In automatic selection of eval_dim, what |
|
82 |
+fraction of variation must be explained by the first eval_dim PCs of expr? |
|
83 |
+Default 0.2. Ignored if eval_dim is not NULL.} |
|
84 |
+ |
|
85 |
+\item{eval_poscon}{character. The genes to be used as positive controls for |
|
86 |
+evaluation. These genes should be expected to change according to the |
|
87 |
+biological phenomenon of interest.} |
|
88 |
+ |
|
89 |
+\item{eval_kclust}{numeric. The number of clusters (> 1) to be used for pam |
|
90 |
+tightness evaluation. If an array of integers, largest average silhouette |
|
91 |
+width (tightness) will be reported. If NULL, tightness will be returned NA.} |
|
92 |
+ |
|
93 |
+\item{eval_stratified_pam}{logical. If TRUE then maximum ASW for PAM_SIL is |
|
94 |
+separately computed for each biological-cross-batch condition (accepting |
|
95 |
+NAs), and a weighted average is returned as PAM_SIL. |
|
96 |
+Default TRUE.} |
|
97 |
+} |
|
98 |
+\value{ |
|
99 |
+Directory structure "ADD DESCRIPTION" |
|
100 |
+} |
|
101 |
+\description{ |
|
102 |
+Wrapper for Running Essential SCONE Modules |
|
103 |
+} |
|
104 |
+\details{ |
|
105 |
+"ADD DESCRIPTION" |
|
106 |
+} |
|
107 |
+ |