git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/preprocessCore@27190 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,12 +1,12 @@ |
1 | 1 |
Package: preprocessCore |
2 |
-Version: 0.99.17 |
|
2 |
+Version: 0.99.18 |
|
3 | 3 |
Title: A collection of pre-processing functions |
4 | 4 |
Author: Benjamin Milo Bolstad <bmb@bmbolstad.com> |
5 | 5 |
Maintainer: Benjamin Milo Bolstad <bmb@bmbolstad.com> |
6 | 6 |
Depends: methods |
7 | 7 |
Description: A library of core preprocessing routines |
8 | 8 |
License: LGPL version 2 or newer |
9 |
-Collate: normalize.quantiles.R quantile_extensions.R rcModel.R init.R |
|
9 |
+Collate: normalize.quantiles.R quantile_extensions.R rcModel.R colSummarize.R init.R |
|
10 | 10 |
LazyLoad: yes |
11 | 11 |
biocViews: Infrastructure |
12 | 12 |
|
13 | 13 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,146 @@ |
1 |
+## |
|
2 |
+## file: colSummarize.R |
|
3 |
+## |
|
4 |
+## Author: B. M. Bolstad <bmb@bmbolstad.com> |
|
5 |
+## |
|
6 |
+## History |
|
7 |
+## Sept 15, 2007 - Initial verison |
|
8 |
+## |
|
9 |
+ |
|
10 |
+ |
|
11 |
+ |
|
12 |
+ |
|
13 |
+colSummarizeAvgLog <- function(y){ |
|
14 |
+ if (!is.matrix(y)) |
|
15 |
+ stop("argument should be matrix") |
|
16 |
+ |
|
17 |
+ if (!is.double(y) & is.numeric(y)) |
|
18 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
19 |
+ else if (!is.numeric(y)) |
|
20 |
+ stop("argument should be numeric matrix") |
|
21 |
+ |
|
22 |
+ .Call("R_colSummarize_avg_log",y,PACKAGE="preprocessCore") |
|
23 |
+} |
|
24 |
+ |
|
25 |
+ |
|
26 |
+colSummarizeLogAvg <- function(y){ |
|
27 |
+ if (!is.matrix(y)) |
|
28 |
+ stop("argument should be matrix") |
|
29 |
+ |
|
30 |
+ if (!is.double(y) & is.numeric(y)) |
|
31 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
32 |
+ else if (!is.numeric(y)) |
|
33 |
+ stop("argument should be numeric matrix") |
|
34 |
+ |
|
35 |
+ .Call("R_colSummarize_log_avg",y,PACKAGE="preprocessCore") |
|
36 |
+} |
|
37 |
+ |
|
38 |
+ |
|
39 |
+colSummarizeAvg <- function(y){ |
|
40 |
+ if (!is.matrix(y)) |
|
41 |
+ stop("argument should be matrix") |
|
42 |
+ |
|
43 |
+ if (!is.double(y) & is.numeric(y)) |
|
44 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
45 |
+ else if (!is.numeric(y)) |
|
46 |
+ stop("argument should be numeric matrix") |
|
47 |
+ |
|
48 |
+ .Call("R_colSummarize_avg",y,PACKAGE="preprocessCore") |
|
49 |
+} |
|
50 |
+ |
|
51 |
+ |
|
52 |
+colSummarizeLogMedian <- function(y){ |
|
53 |
+ if (!is.matrix(y)) |
|
54 |
+ stop("argument should be matrix") |
|
55 |
+ |
|
56 |
+ if (!is.double(y) & is.numeric(y)) |
|
57 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
58 |
+ else if (!is.numeric(y)) |
|
59 |
+ stop("argument should be numeric matrix") |
|
60 |
+ |
|
61 |
+ .Call("R_colSummarize_log_median",y,PACKAGE="preprocessCore") |
|
62 |
+} |
|
63 |
+ |
|
64 |
+ |
|
65 |
+ |
|
66 |
+ |
|
67 |
+colSummarizeMedianLog <- function(y){ |
|
68 |
+ if (!is.matrix(y)) |
|
69 |
+ stop("argument should be matrix") |
|
70 |
+ |
|
71 |
+ if (!is.double(y) & is.numeric(y)) |
|
72 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
73 |
+ else if (!is.numeric(y)) |
|
74 |
+ stop("argument should be numeric matrix") |
|
75 |
+ |
|
76 |
+ .Call("R_colSummarize_median_log",y,PACKAGE="preprocessCore") |
|
77 |
+} |
|
78 |
+ |
|
79 |
+ |
|
80 |
+colSummarizeMedian <- function(y){ |
|
81 |
+ if (!is.matrix(y)) |
|
82 |
+ stop("argument should be matrix") |
|
83 |
+ |
|
84 |
+ if (!is.double(y) & is.numeric(y)) |
|
85 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
86 |
+ else if (!is.numeric(y)) |
|
87 |
+ stop("argument should be numeric matrix") |
|
88 |
+ |
|
89 |
+ .Call("R_colSummarize_median",y,PACKAGE="preprocessCore") |
|
90 |
+} |
|
91 |
+ |
|
92 |
+ |
|
93 |
+ |
|
94 |
+colSummarizeBiweightLog <- function(y){ |
|
95 |
+ if (!is.matrix(y)) |
|
96 |
+ stop("argument should be matrix") |
|
97 |
+ |
|
98 |
+ if (!is.double(y) & is.numeric(y)) |
|
99 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
100 |
+ else if (!is.numeric(y)) |
|
101 |
+ stop("argument should be numeric matrix") |
|
102 |
+ |
|
103 |
+ .Call("R_colSummarize_biweight_log",y,PACKAGE="preprocessCore") |
|
104 |
+} |
|
105 |
+ |
|
106 |
+colSummarizeBiweight <- function(y){ |
|
107 |
+ if (!is.matrix(y)) |
|
108 |
+ stop("argument should be matrix") |
|
109 |
+ |
|
110 |
+ if (!is.double(y) & is.numeric(y)) |
|
111 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
112 |
+ else if (!is.numeric(y)) |
|
113 |
+ stop("argument should be numeric matrix") |
|
114 |
+ |
|
115 |
+ .Call("R_colSummarize_biweight",y,PACKAGE="preprocessCore") |
|
116 |
+} |
|
117 |
+ |
|
118 |
+ |
|
119 |
+ |
|
120 |
+colSummarizeMedianpolishLog <- function(y){ |
|
121 |
+ if (!is.matrix(y)) |
|
122 |
+ stop("argument should be matrix") |
|
123 |
+ |
|
124 |
+ if (!is.double(y) & is.numeric(y)) |
|
125 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
126 |
+ else if (!is.numeric(y)) |
|
127 |
+ stop("argument should be numeric matrix") |
|
128 |
+ |
|
129 |
+ .Call("R_colSummarize_medianpolish_log",y,PACKAGE="preprocessCore") |
|
130 |
+} |
|
131 |
+ |
|
132 |
+ |
|
133 |
+colSummarizeMedianpolish <- function(y){ |
|
134 |
+ if (!is.matrix(y)) |
|
135 |
+ stop("argument should be matrix") |
|
136 |
+ |
|
137 |
+ if (!is.double(y) & is.numeric(y)) |
|
138 |
+ y <- matrix(as.double(y),dim(y)[1],dim(y)[2]) |
|
139 |
+ else if (!is.numeric(y)) |
|
140 |
+ stop("argument should be numeric matrix") |
|
141 |
+ |
|
142 |
+ .Call("R_colSummarize_medianpolish",y,PACKAGE="preprocessCore") |
|
143 |
+} |
|
144 |
+ |
|
145 |
+ |
|
146 |
+ |
0 | 147 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,78 @@ |
1 |
+\name{colSumamrize} |
|
2 |
+\alias{colSummarizeAvg} |
|
3 |
+\alias{colSummarizeAvgLog} |
|
4 |
+\alias{colSummarizeBiweight} |
|
5 |
+\alias{colSummarizeBiweightLog} |
|
6 |
+\alias{colSummarizeLogAvg} |
|
7 |
+\alias{colSummarizeLogMedian} |
|
8 |
+\alias{colSummarizeMedian} |
|
9 |
+\alias{colSummarizeMedianLog} |
|
10 |
+\alias{colSummarizeMedianpolish} |
|
11 |
+\alias{colSummarizeMedianpolishLog} |
|
12 |
+} |
|
13 |
+\title{Summarize the column of matrices} |
|
14 |
+\description{Compute column wise summary values of a matrix. |
|
15 |
+} |
|
16 |
+\usage{ |
|
17 |
+colSummarizeAvg(y) |
|
18 |
+colSummarizeAvgLog(y) |
|
19 |
+colSummarizeBiweight(y) |
|
20 |
+colSummarizeBiweightLog(y) |
|
21 |
+colSummarizeLogAvg(y) |
|
22 |
+colSummarizeLogMedian(y) |
|
23 |
+colSummarizeMedian(y) |
|
24 |
+colSummarizeMedianLog(y) |
|
25 |
+colSummarizeMedianpolish(y) |
|
26 |
+colSummarizeMedianpolishLog(y) |
|
27 |
+ |
|
28 |
+} |
|
29 |
+\arguments{ |
|
30 |
+ \item{y}{A numeric matrix} |
|
31 |
+} |
|
32 |
+\value{ |
|
33 |
+ A list with following items: |
|
34 |
+ \item{Estimates}{Summary values for each column.} |
|
35 |
+ \item{StdErrors}{Standard error estimates.} |
|
36 |
+} |
|
37 |
+\details{This groups of functions summarize the columns of a given |
|
38 |
+ matrices. |
|
39 |
+ |
|
40 |
+ \itemize{ |
|
41 |
+ \item{\code{colSummarizeAvg}}{Take means in column-wise manner} |
|
42 |
+ \item{\code{colSummarizeAvgLog}}{\code{log2} transform the data and |
|
43 |
+ then take means in column-wise manner} |
|
44 |
+ \item{\code{colSummarizeBiweight}}{Summarize each column using a one |
|
45 |
+ step Tukey Biweight procedure} |
|
46 |
+ \item{\code{colSummarizeBiweightLog}}{\code{log2} transform the data |
|
47 |
+ and then summarize each column using a one step Tuke Biweight |
|
48 |
+ procedure} |
|
49 |
+ \item{\code{colSummarizeLogAvg}}{Compute the mean of each column and |
|
50 |
+ then \code{log2} transform it} |
|
51 |
+ \item{\code{colSummarizeLogMedian}}{Compute the median of each |
|
52 |
+ column and then \code{log2} transform it} |
|
53 |
+ \item{\code{colSummarizeMedian}}{Compute the median of each column} |
|
54 |
+ \item{\code{colSummarizeMedianLog}}{\code{log2} transform the data |
|
55 |
+ and then summarize each column using the median} |
|
56 |
+ \item{\code{colSummarizeMedianpolish}}{Use the median polish to |
|
57 |
+ summarize each column, by also using a row effect (not returned)} |
|
58 |
+ \item{\code{colSummarizeMedianpolishLog}}{\code{log2} transform the |
|
59 |
+ data and then use the median polish to summarize each column, by |
|
60 |
+ also using a row effect (not returned)} |
|
61 |
+ } |
|
62 |
+} |
|
63 |
+\examples{ |
|
64 |
+y <- matrix(10+rnorm(100),20,5) |
|
65 |
+ |
|
66 |
+colSummarizeAvg(y) |
|
67 |
+colSummarizeAvgLog(y) |
|
68 |
+colSummarizeBiweight(y) |
|
69 |
+colSummarizeBiweightLog(y) |
|
70 |
+colSummarizeLogAvg(y) |
|
71 |
+colSummarizeLogMedian(y) |
|
72 |
+colSummarizeMedian(y) |
|
73 |
+colSummarizeMedianLog(y) |
|
74 |
+colSummarizeMedianpolish(y) |
|
75 |
+colSummarizeMedianpolishLog(y) |
|
76 |
+} |
|
77 |
+\author{B. M. Bolstad \email{bmb@bmbolstad.com}} |
|
78 |
+\keyword{univar} |
|
0 | 79 |
\ No newline at end of file |
18 | 18 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,554 @@ |
1 |
+/********************************************************************* |
|
2 |
+ ** |
|
3 |
+ ** file: R_rlm_interfaces.c |
|
4 |
+ ** |
|
5 |
+ ** Aim: Code which provides .Call() interfaces to the column |
|
6 |
+ ** summarization code. |
|
7 |
+ ** |
|
8 |
+ ** Copyright (C) 2007 Ben Bolstad |
|
9 |
+ ** |
|
10 |
+ ** created by: B. M. Bolstad <bmb@bmbolstad.com> |
|
11 |
+ ** |
|
12 |
+ ** created on: Sep 15, 2007 |
|
13 |
+ ** |
|
14 |
+ ** History |
|
15 |
+ ** Sep 15, 2007 - Initial version |
|
16 |
+ ** |
|
17 |
+ ** |
|
18 |
+ *********************************************************************/ |
|
19 |
+ |
|
20 |
+ |
|
21 |
+ |
|
22 |
+#include <R.h> |
|
23 |
+#include <Rdefines.h> |
|
24 |
+#include <Rmath.h> |
|
25 |
+#include <Rinternals.h> |
|
26 |
+ |
|
27 |
+#include "R_colSummarize.h" |
|
28 |
+ |
|
29 |
+#include "avg_log.h" |
|
30 |
+#include "log_avg.h" |
|
31 |
+#include "avg.h" |
|
32 |
+ |
|
33 |
+#include "log_median.h" |
|
34 |
+#include "median_log.h" |
|
35 |
+#include "median.h" |
|
36 |
+ |
|
37 |
+#include "biweight.h" |
|
38 |
+#include "medianpolish.h" |
|
39 |
+ |
|
40 |
+SEXP R_colSummarize_avg_log(SEXP RMatrix){ |
|
41 |
+ |
|
42 |
+ |
|
43 |
+ SEXP R_return_value; |
|
44 |
+ SEXP R_return_value_names; |
|
45 |
+ |
|
46 |
+ SEXP R_summaries; |
|
47 |
+ SEXP R_summaries_se; |
|
48 |
+ |
|
49 |
+ |
|
50 |
+ SEXP dim1; |
|
51 |
+ |
|
52 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
53 |
+ double *results, *resultsSE; |
|
54 |
+ |
|
55 |
+ int rows, cols; |
|
56 |
+ |
|
57 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
58 |
+ rows = INTEGER(dim1)[0]; |
|
59 |
+ cols = INTEGER(dim1)[1]; |
|
60 |
+ UNPROTECT(1); |
|
61 |
+ |
|
62 |
+ |
|
63 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
64 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
65 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
66 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
67 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
68 |
+ |
|
69 |
+ |
|
70 |
+ results = NUMERIC_POINTER(R_summaries); |
|
71 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
72 |
+ |
|
73 |
+ averagelog(matrix, rows, cols, results, resultsSE); |
|
74 |
+ |
|
75 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
76 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
77 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
78 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
79 |
+ UNPROTECT(1); |
|
80 |
+ |
|
81 |
+ UNPROTECT(3); |
|
82 |
+ return R_return_value; |
|
83 |
+} |
|
84 |
+ |
|
85 |
+ |
|
86 |
+ |
|
87 |
+ |
|
88 |
+ |
|
89 |
+SEXP R_colSummarize_log_avg(SEXP RMatrix){ |
|
90 |
+ |
|
91 |
+ |
|
92 |
+ SEXP R_return_value; |
|
93 |
+ SEXP R_return_value_names; |
|
94 |
+ |
|
95 |
+ SEXP R_summaries; |
|
96 |
+ SEXP R_summaries_se; |
|
97 |
+ |
|
98 |
+ |
|
99 |
+ SEXP dim1; |
|
100 |
+ |
|
101 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
102 |
+ double *results, *resultsSE; |
|
103 |
+ |
|
104 |
+ int rows, cols; |
|
105 |
+ |
|
106 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
107 |
+ rows = INTEGER(dim1)[0]; |
|
108 |
+ cols = INTEGER(dim1)[1]; |
|
109 |
+ UNPROTECT(1); |
|
110 |
+ |
|
111 |
+ |
|
112 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
113 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
114 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
115 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
116 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
117 |
+ |
|
118 |
+ |
|
119 |
+ results = NUMERIC_POINTER(R_summaries); |
|
120 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
121 |
+ |
|
122 |
+ logaverage(matrix, rows, cols, results, resultsSE); |
|
123 |
+ |
|
124 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
125 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
126 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
127 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
128 |
+ UNPROTECT(1); |
|
129 |
+ |
|
130 |
+ UNPROTECT(3); |
|
131 |
+ return R_return_value; |
|
132 |
+} |
|
133 |
+ |
|
134 |
+ |
|
135 |
+ |
|
136 |
+ |
|
137 |
+SEXP R_colSummarize_avg(SEXP RMatrix){ |
|
138 |
+ |
|
139 |
+ |
|
140 |
+ SEXP R_return_value; |
|
141 |
+ SEXP R_return_value_names; |
|
142 |
+ |
|
143 |
+ SEXP R_summaries; |
|
144 |
+ SEXP R_summaries_se; |
|
145 |
+ |
|
146 |
+ |
|
147 |
+ SEXP dim1; |
|
148 |
+ |
|
149 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
150 |
+ double *results, *resultsSE; |
|
151 |
+ |
|
152 |
+ int rows, cols; |
|
153 |
+ |
|
154 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
155 |
+ rows = INTEGER(dim1)[0]; |
|
156 |
+ cols = INTEGER(dim1)[1]; |
|
157 |
+ UNPROTECT(1); |
|
158 |
+ |
|
159 |
+ |
|
160 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
161 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
162 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
163 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
164 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
165 |
+ |
|
166 |
+ |
|
167 |
+ results = NUMERIC_POINTER(R_summaries); |
|
168 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
169 |
+ |
|
170 |
+ colaverage(matrix, rows, cols, results, resultsSE); |
|
171 |
+ |
|
172 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
173 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
174 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
175 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
176 |
+ UNPROTECT(1); |
|
177 |
+ |
|
178 |
+ UNPROTECT(3); |
|
179 |
+ return R_return_value; |
|
180 |
+} |
|
181 |
+ |
|
182 |
+ |
|
183 |
+ |
|
184 |
+ |
|
185 |
+ |
|
186 |
+ |
|
187 |
+ |
|
188 |
+ |
|
189 |
+ |
|
190 |
+ |
|
191 |
+ |
|
192 |
+ |
|
193 |
+ |
|
194 |
+ |
|
195 |
+ |
|
196 |
+ |
|
197 |
+ |
|
198 |
+ |
|
199 |
+ |
|
200 |
+SEXP R_colSummarize_log_median(SEXP RMatrix){ |
|
201 |
+ |
|
202 |
+ |
|
203 |
+ SEXP R_return_value; |
|
204 |
+ SEXP R_return_value_names; |
|
205 |
+ |
|
206 |
+ SEXP R_summaries; |
|
207 |
+ SEXP R_summaries_se; |
|
208 |
+ |
|
209 |
+ |
|
210 |
+ SEXP dim1; |
|
211 |
+ |
|
212 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
213 |
+ double *results, *resultsSE; |
|
214 |
+ |
|
215 |
+ int rows, cols; |
|
216 |
+ |
|
217 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
218 |
+ rows = INTEGER(dim1)[0]; |
|
219 |
+ cols = INTEGER(dim1)[1]; |
|
220 |
+ UNPROTECT(1); |
|
221 |
+ |
|
222 |
+ |
|
223 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
224 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
225 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
226 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
227 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
228 |
+ |
|
229 |
+ |
|
230 |
+ results = NUMERIC_POINTER(R_summaries); |
|
231 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
232 |
+ |
|
233 |
+ logmedian(matrix, rows, cols, results, resultsSE); |
|
234 |
+ |
|
235 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
236 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
237 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
238 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
239 |
+ UNPROTECT(1); |
|
240 |
+ |
|
241 |
+ UNPROTECT(3); |
|
242 |
+ return R_return_value; |
|
243 |
+} |
|
244 |
+ |
|
245 |
+ |
|
246 |
+ |
|
247 |
+SEXP R_colSummarize_median_log(SEXP RMatrix){ |
|
248 |
+ |
|
249 |
+ |
|
250 |
+ SEXP R_return_value; |
|
251 |
+ SEXP R_return_value_names; |
|
252 |
+ |
|
253 |
+ SEXP R_summaries; |
|
254 |
+ SEXP R_summaries_se; |
|
255 |
+ |
|
256 |
+ |
|
257 |
+ SEXP dim1; |
|
258 |
+ |
|
259 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
260 |
+ double *results, *resultsSE; |
|
261 |
+ |
|
262 |
+ int rows, cols; |
|
263 |
+ |
|
264 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
265 |
+ rows = INTEGER(dim1)[0]; |
|
266 |
+ cols = INTEGER(dim1)[1]; |
|
267 |
+ UNPROTECT(1); |
|
268 |
+ |
|
269 |
+ |
|
270 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
271 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
272 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
273 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
274 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
275 |
+ |
|
276 |
+ |
|
277 |
+ results = NUMERIC_POINTER(R_summaries); |
|
278 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
279 |
+ |
|
280 |
+ medianlog(matrix, rows, cols, results, resultsSE); |
|
281 |
+ |
|
282 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
283 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
284 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
285 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
286 |
+ UNPROTECT(1); |
|
287 |
+ |
|
288 |
+ UNPROTECT(3); |
|
289 |
+ return R_return_value; |
|
290 |
+} |
|
291 |
+ |
|
292 |
+ |
|
293 |
+ |
|
294 |
+ |
|
295 |
+SEXP R_colSummarize_median(SEXP RMatrix){ |
|
296 |
+ |
|
297 |
+ |
|
298 |
+ SEXP R_return_value; |
|
299 |
+ SEXP R_return_value_names; |
|
300 |
+ |
|
301 |
+ SEXP R_summaries; |
|
302 |
+ SEXP R_summaries_se; |
|
303 |
+ |
|
304 |
+ |
|
305 |
+ SEXP dim1; |
|
306 |
+ |
|
307 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
308 |
+ double *results, *resultsSE; |
|
309 |
+ |
|
310 |
+ int rows, cols; |
|
311 |
+ |
|
312 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
313 |
+ rows = INTEGER(dim1)[0]; |
|
314 |
+ cols = INTEGER(dim1)[1]; |
|
315 |
+ UNPROTECT(1); |
|
316 |
+ |
|
317 |
+ |
|
318 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
319 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
320 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
321 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
322 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
323 |
+ |
|
324 |
+ |
|
325 |
+ results = NUMERIC_POINTER(R_summaries); |
|
326 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
327 |
+ |
|
328 |
+ colmedian(matrix, rows, cols, results, resultsSE); |
|
329 |
+ |
|
330 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
331 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
332 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
333 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
334 |
+ UNPROTECT(1); |
|
335 |
+ |
|
336 |
+ UNPROTECT(3); |
|
337 |
+ return R_return_value; |
|
338 |
+} |
|
339 |
+ |
|
340 |
+ |
|
341 |
+ |
|
342 |
+ |
|
343 |
+SEXP R_colSummarize_biweight_log(SEXP RMatrix){ |
|
344 |
+ |
|
345 |
+ |
|
346 |
+ SEXP R_return_value; |
|
347 |
+ SEXP R_return_value_names; |
|
348 |
+ |
|
349 |
+ SEXP R_summaries; |
|
350 |
+ SEXP R_summaries_se; |
|
351 |
+ |
|
352 |
+ |
|
353 |
+ SEXP dim1; |
|
354 |
+ |
|
355 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
356 |
+ double *results, *resultsSE; |
|
357 |
+ |
|
358 |
+ int rows, cols; |
|
359 |
+ |
|
360 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
361 |
+ rows = INTEGER(dim1)[0]; |
|
362 |
+ cols = INTEGER(dim1)[1]; |
|
363 |
+ UNPROTECT(1); |
|
364 |
+ |
|
365 |
+ |
|
366 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
367 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
368 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
369 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
370 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
371 |
+ |
|
372 |
+ |
|
373 |
+ results = NUMERIC_POINTER(R_summaries); |
|
374 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
375 |
+ |
|
376 |
+ tukeybiweight(matrix, rows, cols, results, resultsSE); |
|
377 |
+ |
|
378 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
379 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
380 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
381 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
382 |
+ UNPROTECT(1); |
|
383 |
+ |
|
384 |
+ UNPROTECT(3); |
|
385 |
+ return R_return_value; |
|
386 |
+} |
|
387 |
+ |
|
388 |
+ |
|
389 |
+SEXP R_colSummarize_biweight(SEXP RMatrix){ |
|
390 |
+ |
|
391 |
+ |
|
392 |
+ SEXP R_return_value; |
|
393 |
+ SEXP R_return_value_names; |
|
394 |
+ |
|
395 |
+ SEXP R_summaries; |
|
396 |
+ SEXP R_summaries_se; |
|
397 |
+ |
|
398 |
+ |
|
399 |
+ SEXP dim1; |
|
400 |
+ |
|
401 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
402 |
+ double *results, *resultsSE; |
|
403 |
+ |
|
404 |
+ int rows, cols; |
|
405 |
+ |
|
406 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
407 |
+ rows = INTEGER(dim1)[0]; |
|
408 |
+ cols = INTEGER(dim1)[1]; |
|
409 |
+ UNPROTECT(1); |
|
410 |
+ |
|
411 |
+ |
|
412 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
413 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
414 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
415 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
416 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
417 |
+ |
|
418 |
+ |
|
419 |
+ results = NUMERIC_POINTER(R_summaries); |
|
420 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
421 |
+ |
|
422 |
+ tukeybiweight_no_log(matrix, rows, cols, results, resultsSE); |
|
423 |
+ |
|
424 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
425 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
426 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
427 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
428 |
+ UNPROTECT(1); |
|
429 |
+ |
|
430 |
+ UNPROTECT(3); |
|
431 |
+ return R_return_value; |
|
432 |
+} |
|
433 |
+ |
|
434 |
+ |
|
435 |
+ |
|
436 |
+ |
|
437 |
+ |
|
438 |
+ |
|
439 |
+ |
|
440 |
+ |
|
441 |
+ |
|
442 |
+ |
|
443 |
+SEXP R_colSummarize_medianpolish_log(SEXP RMatrix){ |
|
444 |
+ |
|
445 |
+ |
|
446 |
+ SEXP R_return_value; |
|
447 |
+ SEXP R_return_value_names; |
|
448 |
+ |
|
449 |
+ SEXP R_summaries; |
|
450 |
+ SEXP R_summaries_se; |
|
451 |
+ |
|
452 |
+ |
|
453 |
+ SEXP dim1; |
|
454 |
+ |
|
455 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
456 |
+ double *results, *resultsSE; |
|
457 |
+ |
|
458 |
+ double *resids; |
|
459 |
+ |
|
460 |
+ |
|
461 |
+ int rows, cols; |
|
462 |
+ |
|
463 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
464 |
+ rows = INTEGER(dim1)[0]; |
|
465 |
+ cols = INTEGER(dim1)[1]; |
|
466 |
+ UNPROTECT(1); |
|
467 |
+ |
|
468 |
+ |
|
469 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
470 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
471 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
472 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
473 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
474 |
+ |
|
475 |
+ |
|
476 |
+ results = NUMERIC_POINTER(R_summaries); |
|
477 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
478 |
+ |
|
479 |
+ resids = Calloc(rows*cols, double); |
|
480 |
+ |
|
481 |
+ memcpy(resids, matrix, rows*cols*sizeof(double)); |
|
482 |
+ |
|
483 |
+ median_polish_log2_no_copy(resids, rows, cols, results, resultsSE); |
|
484 |
+ |
|
485 |
+ Free(resids); |
|
486 |
+ |
|
487 |
+ |
|
488 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
489 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
490 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
491 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
492 |
+ UNPROTECT(1); |
|
493 |
+ |
|
494 |
+ UNPROTECT(3); |
|
495 |
+ return R_return_value; |
|
496 |
+} |
|
497 |
+ |
|
498 |
+ |
|
499 |
+ |
|
500 |
+ |
|
501 |
+SEXP R_colSummarize_medianpolish(SEXP RMatrix){ |
|
502 |
+ |
|
503 |
+ |
|
504 |
+ SEXP R_return_value; |
|
505 |
+ SEXP R_return_value_names; |
|
506 |
+ |
|
507 |
+ SEXP R_summaries; |
|
508 |
+ SEXP R_summaries_se; |
|
509 |
+ |
|
510 |
+ |
|
511 |
+ SEXP dim1; |
|
512 |
+ |
|
513 |
+ double *matrix=NUMERIC_POINTER(RMatrix); |
|
514 |
+ double *results, *resultsSE; |
|
515 |
+ |
|
516 |
+ double *resids; |
|
517 |
+ |
|
518 |
+ |
|
519 |
+ int rows, cols; |
|
520 |
+ |
|
521 |
+ PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol)); |
|
522 |
+ rows = INTEGER(dim1)[0]; |
|
523 |
+ cols = INTEGER(dim1)[1]; |
|
524 |
+ UNPROTECT(1); |
|
525 |
+ |
|
526 |
+ |
|
527 |
+ PROTECT(R_return_value = allocVector(VECSXP,2)); |
|
528 |
+ PROTECT(R_summaries = allocVector(REALSXP,cols)); |
|
529 |
+ PROTECT(R_summaries_se = allocVector(REALSXP,cols)); |
|
530 |
+ SET_VECTOR_ELT(R_return_value,0,R_summaries); |
|
531 |
+ SET_VECTOR_ELT(R_return_value,1,R_summaries_se); |
|
532 |
+ |
|
533 |
+ |
|
534 |
+ results = NUMERIC_POINTER(R_summaries); |
|
535 |
+ resultsSE = NUMERIC_POINTER(R_summaries_se); |
|
536 |
+ |
|
537 |
+ resids = Calloc(rows*cols, double); |
|
538 |
+ |
|
539 |
+ memcpy(resids, matrix, rows*cols*sizeof(double)); |
|
540 |
+ |
|
541 |
+ median_polish_no_copy(resids, rows, cols, results, resultsSE); |
|
542 |
+ |
|
543 |
+ Free(resids); |
|
544 |
+ |
|
545 |
+ |
|
546 |
+ PROTECT(R_return_value_names= allocVector(STRSXP,2)); |
|
547 |
+ SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates")); |
|
548 |
+ SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors")); |
|
549 |
+ setAttrib(R_return_value, R_NamesSymbol,R_return_value_names); |
|
550 |
+ UNPROTECT(1); |
|
551 |
+ |
|
552 |
+ UNPROTECT(3); |
|
553 |
+ return R_return_value; |
|
554 |
+} |
0 | 555 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,16 @@ |
1 |
+#ifndef R_COLSUMMARIZE_H |
|
2 |
+#define R_COLSUMMARIZE_H |
|
3 |
+ |
|
4 |
+SEXP R_colSummarize_avg_log(SEXP RMatrix); |
|
5 |
+SEXP R_colSummarize_log_avg(SEXP RMatrix); |
|
6 |
+SEXP R_colSummarize_log_median(SEXP RMatrix); |
|
7 |
+SEXP R_colSummarize_median_log(SEXP RMatrix); |
|
8 |
+SEXP R_colSummarize_biweight_log(SEXP RMatrix); |
|
9 |
+SEXP R_colSummarize_medianpolish_log(SEXP RMatrix); |
|
10 |
+ |
|
11 |
+SEXP R_colSummarize_avg(SEXP RMatrix); |
|
12 |
+SEXP R_colSummarize_median(SEXP RMatrix); |
|
13 |
+SEXP R_colSummarize_biweight(SEXP RMatrix); |
|
14 |
+SEXP R_colSummarize_medianpolish(SEXP RMatrix); |
|
15 |
+ |
|
16 |
+#endif |
0 | 17 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,233 @@ |
1 |
+/************************************************************************ |
|
2 |
+ ** |
|
3 |
+ ** avg.c |
|
4 |
+ ** |
|
5 |
+ ** created by: B. M. Bolstad <bmb@bmbolstad.com> |
|
6 |
+ ** created on: Sep 16, 2007 (but based on earlier work from Nov avg_log.c) |
|
7 |
+ ** |
|
8 |
+ ** Copyright (C) 2007 Ben Bolstad |
|
9 |
+ ** |
|
10 |
+ ** last modified: Sept 16, 2007 |
|
11 |
+ ** |
|
12 |
+ ** License: GPL V2 or later (same as the rest of the Affy package) |
|
13 |
+ ** |
|
14 |
+ ** General discussion |
|
15 |
+ ** |
|
16 |
+ ** Implement average summarization |
|
17 |
+ ** |
|
18 |
+ ** History |
|
19 |
+ ** Sep 16, 2007 - Initial version |
|
20 |
+ ** |
|
21 |
+ ** |
|
22 |
+ ** |
|
23 |
+ ************************************************************************/ |
|
24 |
+ |
|
25 |
+#include "avg.h" |
|
26 |
+ |
|
27 |
+#include <R.h> |
|
28 |
+#include <Rdefines.h> |
|
29 |
+#include <Rmath.h> |
|
30 |
+#include <Rinternals.h> |
|
31 |
+ |
|
32 |
+#include <stdio.h> |
|
33 |
+#include <stdlib.h> |
|
34 |
+#include <math.h> |
|
35 |
+ |
|
36 |
+/*************************************************************************** |
|
37 |
+ ** |
|
38 |
+ ** double AvgLog(double *x, int length) |
|
39 |
+ ** |
|
40 |
+ ** double *x - a vector of PM intensities (previously log2 transformed) |
|
41 |
+ ** int length - length of *x |
|
42 |
+ ** |
|
43 |
+ ** take the average of log2 PM intensities. |
|
44 |
+ ** |
|
45 |
+ ***************************************************************************/ |
|
46 |
+ |
|
47 |
+static double Avg(double *x, int length){ |
|
48 |
+ int i; |
|
49 |
+ double sum = 0.0; |
|
50 |
+ double mean = 0.0; |
|
51 |
+ |
|
52 |
+ for (i=0; i < length; i++){ |
|
53 |
+ sum = sum + x[i]; |
|
54 |
+ } |
|
55 |
+ |
|
56 |
+ mean = sum/(double)length; |
|
57 |
+ |
|
58 |
+ return (mean); |
|
59 |
+} |
|
60 |
+ |
|
61 |
+/*************************************************************************** |
|
62 |
+ ** |
|
63 |
+ ** static double AvgLogSE(double *x, int length) |
|
64 |
+ ** |
|
65 |
+ ** double *x - a vector of PM intensities (previously log2 transformed) |
|
66 |
+ ** double mean - the mean of x computed using AvgLog above |
|
67 |
+ ** int length - length of *x |
|
68 |
+ ** |
|
69 |
+ ** compute the standard error of the average of log2 PM intensities. |
|
70 |
+ ** |
|
71 |
+ ** |
|
72 |
+ ***************************************************************************/ |
|
73 |
+ |
|
74 |
+static double AvgSE(double *x, double mean, int length){ |
|
75 |
+ int i; |
|
76 |
+ double sum = 0.0; |
|
77 |
+ |
|
78 |
+ for (i=0; i < length; i++){ |
|
79 |
+ sum = sum + (x[i]- mean)*(x[i] - mean); |
|
80 |
+ } |
|
81 |
+ |
|
82 |
+ sum = sqrt(sum/(double)(length -1)); |
|
83 |
+ sum = sum/sqrt((double)length); |
|
84 |
+ |
|
85 |
+ return (sum); |
|
86 |
+} |
|
87 |
+ |
|
88 |
+ |
|
89 |
+void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ |
|
90 |
+ int i,j; |
|
91 |
+ |
|
92 |
+ for (j = 0; j < cols; j++){ |
|
93 |
+ results[j] = Avg(&data[j*rows],rows); |
|
94 |
+ resultsSE[j] = AvgSE(&data[j*rows],results[j],rows); |
|
95 |
+ } |
|
96 |
+} |
|
97 |
+ |
|
98 |
+ |
|
99 |
+/*************************************************************************** |
|
100 |
+ ** |
|
101 |
+ ** void average(double *data, int rows, int cols, double *results, double *resultsSE) |
|
102 |
+ ** |
|
103 |
+ ** aim: given a data matrix of probe intensities, compute averages in column wise manner |
|
104 |
+ ** |
|
105 |
+ ** |
|
106 |
+ ** double *data - Probe intensity matrix |
|
107 |
+ ** int rows - number of rows in matrix *data (probes) |
|
108 |
+ ** int cols - number of cols in matrix *data (chips) |
|
109 |
+ ** int *cur_rows - indicies of rows corresponding to current probeset |
|
110 |
+ ** double *results - already allocated location to store expression measures (cols length) |
|
111 |
+ ** int nprobes - number of probes in current probeset. |
|
112 |
+ ** |
|
113 |
+ ***************************************************************************/ |
|
114 |
+ |
|
115 |
+ |
|
116 |
+void colaverage(double *data, int rows, int cols, double *results, double *resultsSE){ |
|
117 |
+ int i,j; |
|
118 |
+ double *z = Calloc(rows,double); |
|
119 |
+ |
|
120 |
+ for (j = 0; j < cols; j++){ |
|
121 |
+ for (i =0; i < rows; i++){ |
|
122 |
+ z[i] = data[j*rows + i]; |
|
123 |
+ } |
|
124 |
+ results[j] = Avg(z,rows); |
|
125 |
+ resultsSE[j] = AvgSE(z,results[j],rows); |
|
126 |
+ } |
|
127 |
+ Free(z); |
|
128 |
+ |
|
129 |
+} |
|
130 |
+ |
|
131 |
+ |
|
132 |
+ |
|
133 |
+ |
|
134 |
+/*************************************************************************** |
|
135 |
+ ** |
|
136 |
+ ** double Average(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) |
|
137 |
+ ** |
|
138 |
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix |
|
139 |
+ ** corresponding to a single probeset, compute average log2 expression measure. |
|
140 |
+ ** Note that data is a probes by chips matrix. |
|
141 |
+ ** |
|
142 |
+ ** double *data - Probe intensity matrix |
|
143 |
+ ** int rows - number of rows in matrix *data (probes) |
|
144 |
+ ** int cols - number of cols in matrix *data (chips) |
|
145 |
+ ** int *cur_rows - indicies of rows corresponding to current probeset |
|
146 |
+ ** double *results - already allocated location to store expression measures (cols length) |
|
147 |
+ ** int nprobes - number of probes in current probeset. |
|
148 |
+ ** |
|
149 |
+ ***************************************************************************/ |
|
150 |
+ |
|
151 |
+/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix |
|
152 |
+ * corresponding to a single probeset, compute average log2 expression measure. |
|
153 |
+ * Note that data is a probes by chips matrix. Also compute SE estimates |
|
154 |
+ * |
|
155 |
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory |
|
156 |
+ * @param rows the number of rows in the matrix |
|
157 |
+ * @param cols the number of columns in the matrix |
|
158 |
+ * @param cur_rows a vector containing row indices to use |
|
159 |
+ * @param results pre-allocated space to store output log2 averages. Should be of length cols |
|
160 |
+ * @param nprobes number of probes in current set |
|
161 |
+ * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols |
|
162 |
+ * |
|
163 |
+ * |
|
164 |
+ */ |
|
165 |
+ |
|
166 |
+void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ |
|
167 |
+ int i,j; |
|
168 |
+ double *z = Calloc(nprobes*cols,double); |
|
169 |
+ |
|
170 |
+ for (j = 0; j < cols; j++){ |
|
171 |
+ for (i =0; i < nprobes; i++){ |
|
172 |
+ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; |
|
173 |
+ } |
|
174 |
+ } |
|
175 |
+ |
|
176 |
+ for (j=0; j < cols; j++){ |
|
177 |
+ results[j] = Avg(&z[j*nprobes],nprobes); |
|
178 |
+ resultsSE[j] = AvgSE(&z[j*nprobes],results[j],nprobes); |
|
179 |
+ } |
|
180 |
+ |
|
181 |
+ Free(z); |
|
182 |
+} |
|
183 |
+ |
|
184 |
+ |
|
185 |
+/*************************************************************************** |
|
186 |
+ ** |
|
187 |
+ ** double AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) |
|
188 |
+ ** |
|
189 |
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix |
|
190 |
+ ** corresponding to a single probeset, compute average log2 expression measure. |
|
191 |
+ ** Note that data is a probes by chips matrix. |
|
192 |
+ ** |
|
193 |
+ ** double *data - Probe intensity matrix |
|
194 |
+ ** int rows - number of rows in matrix *data (probes) |
|
195 |
+ ** int cols - number of cols in matrix *data (chips) |
|
196 |
+ ** int *cur_rows - indicies of rows corresponding to current probeset |
|
197 |
+ ** double *results - already allocated location to store expression measures (cols length) |
|
198 |
+ ** int nprobes - number of probes in current probeset. |
|
199 |
+ ** |
|
200 |
+ ***************************************************************************/ |
|
201 |
+ |
|
202 |
+/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix |
|
203 |
+ * corresponding to a single probeset, compute average log2 expression measure. |
|
204 |
+ * Note that data is a probes by chips matrix. |
|
205 |
+ * |
|
206 |
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory |
|
207 |
+ * @param rows the number of rows in the matrix |
|
208 |
+ * @param cols the number of columns in the matrix |
|
209 |
+ * @param cur_rows a vector containing row indices to use |
|
210 |
+ * @param results pre-allocated space to store output log2 averages. Should be of length cols |
|
211 |
+ * @param nprobes number of probes in current set |
|
212 |
+ * |
|
213 |
+ * |
|
214 |
+ */ |
|
215 |
+ |
|
216 |
+void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ |
|
217 |
+ int i,j; |
|
218 |
+ double *z = Calloc(nprobes*cols,double); |
|
219 |
+ |
|
220 |
+ for (j = 0; j < cols; j++){ |
|
221 |
+ for (i =0; i < nprobes; i++){ |
|
222 |
+ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; |
|
223 |
+ } |
|
224 |
+ } |
|
225 |
+ |
|
226 |
+ for (j=0; j < cols; j++){ |
|
227 |
+ results[j] = Avg(&z[j*nprobes],nprobes); |
|
228 |
+ } |
|
229 |
+ Free(z); |
|
230 |
+} |
|
231 |
+ |
|
232 |
+ |
|
233 |
+ |
0 | 234 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,10 @@ |
1 |
+#ifndef AVG_H |
|
2 |
+#define AVG_H |
|
3 |
+ |
|
4 |
+void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); |
|
5 |
+void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); |
|
6 |
+ |
|
7 |
+void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); |
|
8 |
+void colaverage(double *data, int rows, int cols, double *results, double *resultsSE); |
|
9 |
+ |
|
10 |
+#endif |
... | ... |
@@ -24,6 +24,7 @@ |
24 | 24 |
** Oct 10, 2003 - added threestepPLM version of this summary. |
25 | 25 |
** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation |
26 | 26 |
** May 26, 2007 - fix memory leak in average_log. add additional interfaces |
27 |
+ ** Sep 16, 2007 - fix error in how StdError is computed |
|
27 | 28 |
** |
28 | 29 |
************************************************************************/ |
29 | 30 |
|
... | ... |
@@ -84,7 +85,8 @@ static double AvgLogSE(double *x, double mean, int length){ |
84 | 85 |
sum = sum + (x[i]- mean)*(x[i] - mean); |
85 | 86 |
} |
86 | 87 |
|
87 |
- sum = sqrt(sum/(double)length); |
|
88 |
+ sum = sqrt(sum/(double)(length-1)); |
|
89 |
+ sum = sum/sqrt((double)length); |
|
88 | 90 |
|
89 | 91 |
return (sum); |
90 | 92 |
} |
... | ... |
@@ -26,6 +26,7 @@ |
26 | 26 |
** Oct 10, 2003 - added in PLM version |
27 | 27 |
** Apr 5, 2004 - Change mallocs to Callocs |
28 | 28 |
** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation |
29 |
+ ** Sep 16, 2007 - fix bug in tukeybiweight |
|
29 | 30 |
** |
30 | 31 |
************************************************************************/ |
31 | 32 |
|
... | ... |
@@ -192,7 +193,7 @@ void tukeybiweight(double *data, int rows, int cols, double *results, double *re |
192 | 193 |
|
193 | 194 |
for (j = 0; j < cols; j++){ |
194 | 195 |
for (i =0; i < rows; i++){ |
195 |
- z[j*rows + i] = log(data[j*rows + i])/log(2.0); |
|
196 |
+ z[i] = log(data[j*rows + i])/log(2.0); |
|
196 | 197 |
} |
197 | 198 |
results[j] = Tukey_Biweight(z,rows); |
198 | 199 |
resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows); |
... | ... |
@@ -204,6 +205,40 @@ void tukeybiweight(double *data, int rows, int cols, double *results, double *re |
204 | 205 |
|
205 | 206 |
} |
206 | 207 |
|
208 |
+ |
|
209 |
+ |
|
210 |
+ |
|
211 |
+void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE){ |
|
212 |
+ int i,j; |
|
213 |
+ double *z = Calloc(rows,double); |
|
214 |
+ |
|
215 |
+ for (j = 0; j < cols; j++){ |
|
216 |
+ for (i =0; i < rows; i++){ |
|
217 |
+ z[i] = data[j*rows + i]; |
|
218 |
+ } |
|
219 |
+ results[j] = Tukey_Biweight(z,rows); |
|
220 |
+ resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows); |
|
221 |
+ } |
|
222 |
+ Free(z); |
|
223 |
+ |
|
224 |
+ |
|
225 |
+ |
|
226 |
+ |
|
227 |
+} |
|
228 |
+ |
|
229 |
+ |
|
230 |
+ |
|
231 |
+ |
|
232 |
+ |
|
233 |
+ |
|
234 |
+ |
|
235 |
+ |
|
236 |
+ |
|
237 |
+ |
|
238 |
+ |
|
239 |
+ |
|
240 |
+ |
|
241 |
+ |
|
207 | 242 |
/********************************************************************************** |
208 | 243 |
** |
209 | 244 |
** void tukeybiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) |
... | ... |
@@ -2,6 +2,7 @@ |
2 | 2 |
#define BIWEIGHT_H 1 |
3 | 3 |
|
4 | 4 |
void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE); |
5 |
+void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE); |
|
5 | 6 |
void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); |
6 | 7 |
double Tukey_Biweight(double *x, int length); |
7 | 8 |
|
... | ... |
@@ -16,11 +16,14 @@ |
16 | 16 |
|
17 | 17 |
#include "qnorm.h" |
18 | 18 |
#include "medianpolish.h" |
19 |
+ |
|
19 | 20 |
#include "log_avg.h" |
20 | 21 |
#include "avg_log.h" |
22 |
+#include "avg.h" |
|
21 | 23 |
|
22 | 24 |
#include "median_log.h" |
23 | 25 |
#include "log_median.h" |
26 |
+#include "median.h" |
|
24 | 27 |
|
25 | 28 |
#include "biweight.h" |
26 | 29 |
#include "lm.h" |
... | ... |
@@ -28,7 +31,7 @@ |
28 | 31 |
#include "rlm_se.h" |
29 | 32 |
|
30 | 33 |
#include "R_rlm_interfaces.h" |
31 |
- |
|
34 |
+#include "R_colSummarize.h" |
|
32 | 35 |
|
33 | 36 |
#include <R_ext/Rdynload.h> |
34 | 37 |
#include <Rdefines.h> |
... | ... |
@@ -50,7 +53,16 @@ static const R_CallMethodDef callMethods[] = { |
50 | 53 |
{"R_rlm_rma_default_model",(DL_FUNC)&R_rlm_rma_default_model,3}, |
51 | 54 |
{"R_wrlm_rma_default_model", (DL_FUNC)&R_wrlm_rma_default_model,4}, |
52 | 55 |
{"R_medianpolish_rma_default_model", (DL_FUNC)&R_medianpolish_rma_default_model,1}, |
53 |
- {NULL, NULL, 0} |
|
56 |
+ {"R_colSummarize_avg_log", (DL_FUNC)&R_colSummarize_avg_log,1}, |
|
57 |
+ {"R_colSummarize_log_avg", (DL_FUNC)&R_colSummarize_log_avg,1}, |
|
58 |
+ {"R_colSummarize_median_log", (DL_FUNC)&R_colSummarize_median_log,1}, |
|
59 |
+ {"R_colSummarize_log_median", (DL_FUNC)&R_colSummarize_log_median,1}, |
|
60 |
+ {"R_colSummarize_biweight_log", (DL_FUNC)&R_colSummarize_biweight_log,1}, |
|
61 |
+ {"R_colSummarize_medianpolish_log",(DL_FUNC)&R_colSummarize_medianpolish_log,1}, |
|
62 |
+ {"R_colSummarize_avg",(DL_FUNC)&R_colSummarize_avg,1}, |
|
63 |
+ {"R_colSummarize_median",(DL_FUNC)&R_colSummarize_median,1}, |
|
64 |
+ {"R_colSummarize_biweight", (DL_FUNC)&R_colSummarize_biweight,1}, |
|
65 |
+ {"R_colSummarize_medianpolish",(DL_FUNC)&R_colSummarize_medianpolish,1},{NULL, NULL, 0} |
|
54 | 66 |
}; |
55 | 67 |
|
56 | 68 |
void R_init_preprocessCore(DllInfo *info){ |
... | ... |
@@ -81,6 +93,11 @@ void R_init_preprocessCore(DllInfo *info){ |
81 | 93 |
R_RegisterCCallable("preprocessCore","averagelog", (DL_FUNC)&averagelog); |
82 | 94 |
R_RegisterCCallable("preprocessCore","AverageLog_noSE", (DL_FUNC)&AverageLog_noSE); |
83 | 95 |
|
96 |
+ R_RegisterCCallable("preprocessCore","ColAverage", (DL_FUNC)&ColAverage); |
|
97 |
+ R_RegisterCCallable("preprocessCore","colaverage_no_copy", (DL_FUNC)&colaverage_no_copy); |
|
98 |
+ R_RegisterCCallable("preprocessCore","colaverage", (DL_FUNC)&colaverage); |
|
99 |
+ R_RegisterCCallable("preprocessCore","ColAverage_noSE", (DL_FUNC)&ColAverage_noSE); |
|
100 |
+ |
|
84 | 101 |
R_RegisterCCallable("preprocessCore","MedianLog", (DL_FUNC)&MedianLog); |
85 | 102 |
R_RegisterCCallable("preprocessCore","medianlog_no_copy", (DL_FUNC)&medianlog_no_copy); |
86 | 103 |
R_RegisterCCallable("preprocessCore","medianlog", (DL_FUNC)&medianlog); |
... | ... |
@@ -90,6 +107,11 @@ void R_init_preprocessCore(DllInfo *info){ |
90 | 107 |
R_RegisterCCallable("preprocessCore","logmedian_no_copy", (DL_FUNC)&logmedian_no_copy); |
91 | 108 |
R_RegisterCCallable("preprocessCore","logmedian", (DL_FUNC)&logmedian); |
92 | 109 |
|
110 |
+ R_RegisterCCallable("preprocessCore","ColMedian", (DL_FUNC)&ColMedian); |
|
111 |
+ R_RegisterCCallable("preprocessCore","colmedian_no_copy", (DL_FUNC)&colmedian_no_copy); |
|
112 |
+ R_RegisterCCallable("preprocessCore","colmedian", (DL_FUNC)&colmedian); |
|
113 |
+ R_RegisterCCallable("preprocessCore","ColMedian_noSE", (DL_FUNC)&ColMedian_noSE); |
|
114 |
+ |
|
93 | 115 |
R_RegisterCCallable("preprocessCore","logaverage", (DL_FUNC)&logaverage); |
94 | 116 |
R_RegisterCCallable("preprocessCore","LogAverage", (DL_FUNC)&LogAverage); |
95 | 117 |
|
96 | 118 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,153 @@ |
1 |
+/************************************************************************ |
|
2 |
+ ** |
|
3 |
+ ** median.c |
|
4 |
+ ** |
|
5 |
+ ** created by: B. M. Bolstad <bmb@bmbolstad.com> |
|
6 |
+ ** created on: Feb 6, 2003 (but based on earlier work from median_log.c) |
|
7 |
+ ** |
|
8 |
+ ** Copyright (C) 2007 Ben Bolstad |
|
9 |
+ ** |
|
10 |
+ ** last modified: Sep 16, 2007 |
|
11 |
+ ** |
|
12 |
+ ** License: GPL V2 or later (same as the rest of the Affy package) |
|
13 |
+ ** |
|
14 |
+ ** General discussion |
|
15 |
+ ** |
|
16 |
+ ** Implement median log2 pm summarization. |
|
17 |
+ ** |
|
18 |
+ ** Sep 16, 2007 - initial version |
|
19 |
+ ** |
|
20 |
+ ************************************************************************/ |
|
21 |
+ |
|
22 |
+#include "rma_common.h" |
|
23 |
+#include "median.h" |
|
24 |
+ |
|
25 |
+#include <R.h> |
|
26 |
+#include <Rdefines.h> |
|
27 |
+#include <Rmath.h> |
|
28 |
+#include <Rinternals.h> |
|
29 |
+ |
|
30 |
+#include <stdio.h> |
|
31 |
+#include <stdlib.h> |
|
32 |
+#include <math.h> |
|
33 |
+ |
|
34 |
+ |
|
35 |
+/*************************************************************************** |
|
36 |
+ ** |
|
37 |
+ ** double MedianLog(double *x, int length) |
|
38 |
+ ** |
|
39 |
+ ** double *x - a vector of PM intensities |
|
40 |
+ ** int length - length of *x |
|
41 |
+ ** |
|
42 |
+ ** take the log2 of the median of PM intensities. |
|
43 |
+ ** |
|
44 |
+ ***************************************************************************/ |
|
45 |
+ |
|
46 |
+static double colmedian_wrapper(double *x, int length){ |
|
47 |
+ |
|
48 |
+ double med = 0.0; |
|
49 |
+ |
|
50 |
+ med = median_nocopy(x,length); |
|
51 |
+ |
|
52 |
+ return (med); |
|
53 |
+} |
|
54 |
+ |
|
55 |
+/*************************************************************************** |
|
56 |
+ ** |
|
57 |
+ ** double MedianLogPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) |
|
58 |
+ ** |
|
59 |
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix |
|
60 |
+ ** corresponding to a single probeset, compute log2 Median expression measure. |
|
61 |
+ ** Note that data is a probes by chips matrix. |
|
62 |
+ ** |
|
63 |
+ ** double *data - Probe intensity matrix |
|
64 |
+ ** int rows - number of rows in matrix *data (probes) |
|
65 |
+ ** int cols - number of cols in matrix *data (chips) |
|
66 |
+ ** int *cur_rows - indicies of rows corresponding to current probeset |
|
67 |
+ ** double *results - already allocated location to store expression measures (cols length) |
|
68 |
+ ** int nprobes - number of probes in current probeset. |
|
69 |
+ ** |
|
70 |
+ ***************************************************************************/ |
|
71 |
+ |
|
72 |
+void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){ |
|
73 |
+ int i,j; |
|
74 |
+ double *z = Calloc(nprobes*cols,double); |
|
75 |
+ |
|
76 |
+ for (j = 0; j < cols; j++){ |
|
77 |
+ for (i =0; i < nprobes; i++){ |
|
78 |
+ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; |
|
79 |
+ } |
|
80 |
+ } |
|
81 |
+ |
|
82 |
+ for (j=0; j < cols; j++){ |
|
83 |
+ results[j] = colmedian_wrapper(&z[j*nprobes],nprobes); |
|
84 |
+ resultsSE[j] = R_NaReal; |
|
85 |
+ } |
|
86 |
+ Free(z); |
|
87 |
+} |
|
88 |
+ |
|
89 |
+ |
|
90 |
+ |
|
91 |
+/*************************************************************************** |
|
92 |
+ ** |
|
93 |
+ ** double MedianLogPM_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes) |
|
94 |
+ ** |
|
95 |
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix |
|
96 |
+ ** corresponding to a single probeset, compute log2 Median expression measure. |
|
97 |
+ ** Note that data is a probes by chips matrix. |
|
98 |
+ ** |
|
99 |
+ ** double *data - Probe intensity matrix |
|
100 |
+ ** int rows - number of rows in matrix *data (probes) |
|
101 |
+ ** int cols - number of cols in matrix *data (chips) |
|
102 |
+ ** int *cur_rows - indicies of rows corresponding to current probeset |
|
103 |
+ ** double *results - already allocated location to store expression measures (cols length) |
|
104 |
+ ** int nprobes - number of probes in current probeset. |
|
105 |
+ ** |
|
106 |
+ ***************************************************************************/ |
|
107 |
+ |
|
108 |
+void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){ |
|
109 |
+ int i,j; |
|
110 |
+ double *z = Calloc(nprobes*cols,double); |
|
111 |
+ |
|
112 |
+ for (j = 0; j < cols; j++){ |
|
113 |
+ for (i =0; i < nprobes; i++){ |
|
114 |
+ z[j*nprobes + i] = data[j*rows + cur_rows[i]]; |
|
115 |
+ } |
|
116 |
+ } |
|
117 |
+ |
|
118 |
+ for (j=0; j < cols; j++){ |
|
119 |
+ results[j] = colmedian_wrapper(&z[j*nprobes],nprobes); |
|
120 |
+ |
|
121 |
+ } |
|
122 |
+ Free(z); |
|
123 |
+} |
|
124 |
+ |
|
125 |
+ |
|
126 |
+ |
|
127 |
+void colmedian(double *data, int rows, int cols, double *results, double *resultsSE){ |
|
128 |
+ int i,j; |
|
129 |
+ double *buffer = Calloc(rows, double); |
|
130 |
+ |
|
131 |
+ for (j=0; j < cols; j++){ |
|
132 |
+ for (i = 0; i < rows; i++){ |
|
133 |
+ buffer[i] = data[j*rows + i]; |
|
134 |
+ } |
|
135 |
+ results[j] = colmedian_wrapper(buffer,rows); |
|
136 |
+ resultsSE[j] = R_NaReal; |
|
137 |
+ } |
|
138 |
+ |
|
139 |
+ Free(buffer); |
|
140 |
+} |
|
141 |
+ |
|
142 |
+ |
|
143 |
+ |
|
144 |
+ |
|
145 |
+void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){ |
|
146 |
+ int i,j; |
|
147 |
+ |
|
148 |
+ for (j=0; j < cols; j++){ |
|
149 |
+ results[j] = colmedian_wrapper(&data[j*rows],rows); |
|
150 |
+ resultsSE[j] = R_NaReal; |
|
151 |
+ } |
|
152 |
+ |
|
153 |
+} |
0 | 154 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,15 @@ |
1 |
+#ifndef MEDIAN_H |
|
2 |
+#define MEDIAN_H 1 |
|
3 |
+ |
|
4 |
+ |
|
5 |
+ |
|
6 |
+void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE); |
|
7 |
+void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes); |
|
8 |
+ |
|
9 |
+void colmedian(double *data, int rows, int cols, double *results, double *resultsSE); |
|
10 |
+void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE); |
|
11 |
+ |
|
12 |
+ |
|
13 |
+ |
|
14 |
+ |
|
15 |
+#endif |