Browse code

add colSummarize* functions

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/preprocessCore@27190 bc3139a8-67e5-0310-9ffc-ced21a209358

Ben Bolstad authored on 17/09/2007 02:00:13
Showing14 changed files

... ...
@@ -1,12 +1,12 @@
1 1
 Package: preprocessCore
2
-Version: 0.99.17
2
+Version: 0.99.18
3 3
 Title: A collection of pre-processing functions
4 4
 Author: Benjamin Milo Bolstad <bmb@bmbolstad.com>
5 5
 Maintainer: Benjamin Milo Bolstad <bmb@bmbolstad.com>
6 6
 Depends: methods
7 7
 Description: A library of core preprocessing routines 
8 8
 License: LGPL version 2 or newer
9
-Collate:  normalize.quantiles.R quantile_extensions.R rcModel.R init.R
9
+Collate:  normalize.quantiles.R quantile_extensions.R rcModel.R colSummarize.R init.R
10 10
 LazyLoad: yes
11 11
 biocViews: Infrastructure
12 12
 
13 13
new file mode 100644
... ...
@@ -0,0 +1,146 @@
1
+##
2
+## file: colSummarize.R
3
+##
4
+## Author: B. M. Bolstad <bmb@bmbolstad.com>
5
+##
6
+## History
7
+## Sept 15, 2007 - Initial verison
8
+##
9
+
10
+
11
+
12
+
13
+colSummarizeAvgLog <- function(y){
14
+  if (!is.matrix(y))
15
+    stop("argument should be matrix")
16
+
17
+  if (!is.double(y) & is.numeric(y))
18
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
19
+  else if (!is.numeric(y))
20
+    stop("argument should be numeric matrix")
21
+  
22
+  .Call("R_colSummarize_avg_log",y,PACKAGE="preprocessCore")
23
+}
24
+
25
+
26
+colSummarizeLogAvg <- function(y){
27
+  if (!is.matrix(y))
28
+    stop("argument should be matrix")
29
+
30
+  if (!is.double(y) & is.numeric(y))
31
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
32
+  else if (!is.numeric(y))
33
+    stop("argument should be numeric matrix")
34
+  
35
+  .Call("R_colSummarize_log_avg",y,PACKAGE="preprocessCore")
36
+}
37
+
38
+
39
+colSummarizeAvg <- function(y){
40
+  if (!is.matrix(y))
41
+    stop("argument should be matrix")
42
+
43
+  if (!is.double(y) & is.numeric(y))
44
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
45
+  else if (!is.numeric(y))
46
+    stop("argument should be numeric matrix")
47
+  
48
+  .Call("R_colSummarize_avg",y,PACKAGE="preprocessCore")
49
+}
50
+
51
+
52
+colSummarizeLogMedian <- function(y){
53
+  if (!is.matrix(y))
54
+    stop("argument should be matrix")
55
+
56
+  if (!is.double(y) & is.numeric(y))
57
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
58
+  else if (!is.numeric(y))
59
+    stop("argument should be numeric matrix")
60
+  
61
+  .Call("R_colSummarize_log_median",y,PACKAGE="preprocessCore")
62
+}
63
+
64
+
65
+
66
+
67
+colSummarizeMedianLog <- function(y){
68
+  if (!is.matrix(y))
69
+    stop("argument should be matrix")
70
+
71
+  if (!is.double(y) & is.numeric(y))
72
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
73
+  else if (!is.numeric(y))
74
+    stop("argument should be numeric matrix")
75
+  
76
+  .Call("R_colSummarize_median_log",y,PACKAGE="preprocessCore")
77
+}
78
+
79
+
80
+colSummarizeMedian <- function(y){
81
+  if (!is.matrix(y))
82
+    stop("argument should be matrix")
83
+
84
+  if (!is.double(y) & is.numeric(y))
85
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
86
+  else if (!is.numeric(y))
87
+    stop("argument should be numeric matrix")
88
+  
89
+  .Call("R_colSummarize_median",y,PACKAGE="preprocessCore")
90
+}
91
+
92
+
93
+
94
+colSummarizeBiweightLog <- function(y){
95
+  if (!is.matrix(y))
96
+    stop("argument should be matrix")
97
+
98
+  if (!is.double(y) & is.numeric(y))
99
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
100
+  else if (!is.numeric(y))
101
+    stop("argument should be numeric matrix")
102
+  
103
+  .Call("R_colSummarize_biweight_log",y,PACKAGE="preprocessCore")
104
+}
105
+
106
+colSummarizeBiweight <- function(y){
107
+  if (!is.matrix(y))
108
+    stop("argument should be matrix")
109
+
110
+  if (!is.double(y) & is.numeric(y))
111
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
112
+  else if (!is.numeric(y))
113
+    stop("argument should be numeric matrix")
114
+  
115
+  .Call("R_colSummarize_biweight",y,PACKAGE="preprocessCore")
116
+}
117
+
118
+
119
+
120
+colSummarizeMedianpolishLog <- function(y){
121
+  if (!is.matrix(y))
122
+    stop("argument should be matrix")
123
+
124
+  if (!is.double(y) & is.numeric(y))
125
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
126
+  else if (!is.numeric(y))
127
+    stop("argument should be numeric matrix")
128
+  
129
+  .Call("R_colSummarize_medianpolish_log",y,PACKAGE="preprocessCore")
130
+}
131
+
132
+
133
+colSummarizeMedianpolish <- function(y){
134
+  if (!is.matrix(y))
135
+    stop("argument should be matrix")
136
+
137
+  if (!is.double(y) & is.numeric(y))
138
+    y <- matrix(as.double(y),dim(y)[1],dim(y)[2])
139
+  else if (!is.numeric(y))
140
+    stop("argument should be numeric matrix")
141
+  
142
+  .Call("R_colSummarize_medianpolish",y,PACKAGE="preprocessCore")
143
+}
144
+
145
+
146
+
0 147
new file mode 100644
... ...
@@ -0,0 +1,78 @@
1
+\name{colSumamrize}
2
+\alias{colSummarizeAvg}
3
+\alias{colSummarizeAvgLog}
4
+\alias{colSummarizeBiweight}
5
+\alias{colSummarizeBiweightLog}
6
+\alias{colSummarizeLogAvg}
7
+\alias{colSummarizeLogMedian}
8
+\alias{colSummarizeMedian}
9
+\alias{colSummarizeMedianLog}
10
+\alias{colSummarizeMedianpolish}
11
+\alias{colSummarizeMedianpolishLog}
12
+}
13
+\title{Summarize the column of matrices}
14
+\description{Compute column wise summary values of a matrix.
15
+}
16
+\usage{
17
+colSummarizeAvg(y)
18
+colSummarizeAvgLog(y)
19
+colSummarizeBiweight(y)
20
+colSummarizeBiweightLog(y)
21
+colSummarizeLogAvg(y)
22
+colSummarizeLogMedian(y)
23
+colSummarizeMedian(y)
24
+colSummarizeMedianLog(y)
25
+colSummarizeMedianpolish(y)
26
+colSummarizeMedianpolishLog(y)
27
+
28
+}
29
+\arguments{
30
+  \item{y}{A numeric matrix}
31
+}
32
+\value{
33
+  A list with following items:
34
+  \item{Estimates}{Summary values for each column.}
35
+  \item{StdErrors}{Standard error estimates.}
36
+}
37
+\details{This groups of functions summarize the columns of a given
38
+  matrices.
39
+
40
+  \itemize{
41
+    \item{\code{colSummarizeAvg}}{Take means in column-wise manner}
42
+    \item{\code{colSummarizeAvgLog}}{\code{log2} transform the data and
43
+      then take means in column-wise manner}
44
+    \item{\code{colSummarizeBiweight}}{Summarize each column using a one
45
+      step Tukey Biweight procedure}
46
+    \item{\code{colSummarizeBiweightLog}}{\code{log2} transform the data
47
+      and then summarize each column using a one step Tuke Biweight
48
+      procedure}
49
+    \item{\code{colSummarizeLogAvg}}{Compute the mean of each column and
50
+      then \code{log2} transform it}
51
+    \item{\code{colSummarizeLogMedian}}{Compute the median of each
52
+      column and then \code{log2} transform it}
53
+    \item{\code{colSummarizeMedian}}{Compute the median of each column}
54
+    \item{\code{colSummarizeMedianLog}}{\code{log2} transform the data
55
+      and then summarize each column using the median}
56
+    \item{\code{colSummarizeMedianpolish}}{Use the median polish to
57
+      summarize each column, by also using a row effect (not returned)}
58
+    \item{\code{colSummarizeMedianpolishLog}}{\code{log2} transform the
59
+      data and then use the median polish to summarize each column, by
60
+      also using a row effect (not returned)}
61
+  }
62
+}
63
+\examples{
64
+y <- matrix(10+rnorm(100),20,5)
65
+
66
+colSummarizeAvg(y)
67
+colSummarizeAvgLog(y)
68
+colSummarizeBiweight(y)
69
+colSummarizeBiweightLog(y)
70
+colSummarizeLogAvg(y)
71
+colSummarizeLogMedian(y)
72
+colSummarizeMedian(y)
73
+colSummarizeMedianLog(y)
74
+colSummarizeMedianpolish(y)
75
+colSummarizeMedianpolishLog(y)
76
+}
77
+\author{B. M. Bolstad   \email{bmb@bmbolstad.com}}
78
+\keyword{univar}
0 79
\ No newline at end of file
... ...
@@ -11,7 +11,7 @@ rcModelWPLM(y, w)
11 11
 rcModelMedianPolish(y)
12 12
 }
13 13
 \arguments{
14
-  \item{y}{A matrix}
14
+  \item{y}{A numeric matrix}
15 15
   \item{w}{A matrix or vector of weights. These should be non-negative.}
16 16
 }
17 17
 \value{
18 18
new file mode 100644
... ...
@@ -0,0 +1,554 @@
1
+/*********************************************************************
2
+ **
3
+ ** file: R_rlm_interfaces.c
4
+ **
5
+ ** Aim: Code which provides .Call() interfaces to the column 
6
+ ** summarization code.
7
+ **
8
+ ** Copyright (C) 2007 Ben Bolstad
9
+ **
10
+ ** created by: B. M. Bolstad <bmb@bmbolstad.com>
11
+ ** 
12
+ ** created on: Sep 15, 2007
13
+ **
14
+ ** History
15
+ ** Sep 15, 2007 - Initial version
16
+ **
17
+ **
18
+ *********************************************************************/
19
+
20
+
21
+
22
+#include <R.h>
23
+#include <Rdefines.h>
24
+#include <Rmath.h>
25
+#include <Rinternals.h>
26
+
27
+#include "R_colSummarize.h"
28
+
29
+#include "avg_log.h"
30
+#include "log_avg.h"
31
+#include "avg.h"
32
+
33
+#include "log_median.h"
34
+#include "median_log.h"
35
+#include "median.h"
36
+
37
+#include "biweight.h"
38
+#include "medianpolish.h"
39
+
40
+SEXP R_colSummarize_avg_log(SEXP RMatrix){
41
+
42
+
43
+  SEXP R_return_value;
44
+  SEXP R_return_value_names;
45
+
46
+  SEXP R_summaries;
47
+  SEXP R_summaries_se;
48
+
49
+  
50
+  SEXP dim1;
51
+
52
+  double *matrix=NUMERIC_POINTER(RMatrix);
53
+  double *results, *resultsSE;
54
+  
55
+  int rows, cols;
56
+
57
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
58
+  rows = INTEGER(dim1)[0];
59
+  cols = INTEGER(dim1)[1];
60
+  UNPROTECT(1);
61
+
62
+
63
+  PROTECT(R_return_value = allocVector(VECSXP,2));
64
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
65
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
66
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
67
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
68
+  
69
+
70
+  results = NUMERIC_POINTER(R_summaries);
71
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
72
+
73
+  averagelog(matrix, rows, cols, results, resultsSE);
74
+  
75
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
76
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
77
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
78
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
79
+  UNPROTECT(1);
80
+
81
+  UNPROTECT(3);
82
+  return R_return_value;
83
+}
84
+
85
+
86
+
87
+
88
+
89
+SEXP R_colSummarize_log_avg(SEXP RMatrix){
90
+
91
+
92
+  SEXP R_return_value;
93
+  SEXP R_return_value_names;
94
+
95
+  SEXP R_summaries;
96
+  SEXP R_summaries_se;
97
+
98
+  
99
+  SEXP dim1;
100
+
101
+  double *matrix=NUMERIC_POINTER(RMatrix);
102
+  double *results, *resultsSE;
103
+  
104
+  int rows, cols;
105
+
106
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
107
+  rows = INTEGER(dim1)[0];
108
+  cols = INTEGER(dim1)[1];
109
+  UNPROTECT(1);
110
+
111
+
112
+  PROTECT(R_return_value = allocVector(VECSXP,2));
113
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
114
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
115
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
116
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
117
+  
118
+
119
+  results = NUMERIC_POINTER(R_summaries);
120
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
121
+
122
+  logaverage(matrix, rows, cols, results, resultsSE);
123
+  
124
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
125
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
126
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
127
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
128
+  UNPROTECT(1);
129
+
130
+  UNPROTECT(3);
131
+  return R_return_value;
132
+}
133
+
134
+
135
+
136
+
137
+SEXP R_colSummarize_avg(SEXP RMatrix){
138
+
139
+
140
+  SEXP R_return_value;
141
+  SEXP R_return_value_names;
142
+
143
+  SEXP R_summaries;
144
+  SEXP R_summaries_se;
145
+
146
+  
147
+  SEXP dim1;
148
+
149
+  double *matrix=NUMERIC_POINTER(RMatrix);
150
+  double *results, *resultsSE;
151
+  
152
+  int rows, cols;
153
+
154
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
155
+  rows = INTEGER(dim1)[0];
156
+  cols = INTEGER(dim1)[1];
157
+  UNPROTECT(1);
158
+
159
+
160
+  PROTECT(R_return_value = allocVector(VECSXP,2));
161
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
162
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
163
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
164
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
165
+  
166
+
167
+  results = NUMERIC_POINTER(R_summaries);
168
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
169
+
170
+  colaverage(matrix, rows, cols, results, resultsSE);
171
+  
172
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
173
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
174
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
175
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
176
+  UNPROTECT(1);
177
+
178
+  UNPROTECT(3);
179
+  return R_return_value;
180
+}
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+
190
+
191
+
192
+
193
+
194
+
195
+
196
+
197
+
198
+
199
+
200
+SEXP R_colSummarize_log_median(SEXP RMatrix){
201
+
202
+
203
+  SEXP R_return_value;
204
+  SEXP R_return_value_names;
205
+
206
+  SEXP R_summaries;
207
+  SEXP R_summaries_se;
208
+
209
+  
210
+  SEXP dim1;
211
+
212
+  double *matrix=NUMERIC_POINTER(RMatrix);
213
+  double *results, *resultsSE;
214
+  
215
+  int rows, cols;
216
+
217
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
218
+  rows = INTEGER(dim1)[0];
219
+  cols = INTEGER(dim1)[1];
220
+  UNPROTECT(1);
221
+
222
+
223
+  PROTECT(R_return_value = allocVector(VECSXP,2));
224
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
225
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
226
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
227
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
228
+  
229
+
230
+  results = NUMERIC_POINTER(R_summaries);
231
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
232
+
233
+  logmedian(matrix, rows, cols, results, resultsSE);
234
+  
235
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
236
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
237
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
238
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
239
+  UNPROTECT(1);
240
+
241
+  UNPROTECT(3);
242
+  return R_return_value;
243
+}
244
+
245
+
246
+
247
+SEXP R_colSummarize_median_log(SEXP RMatrix){
248
+
249
+
250
+  SEXP R_return_value;
251
+  SEXP R_return_value_names;
252
+
253
+  SEXP R_summaries;
254
+  SEXP R_summaries_se;
255
+
256
+  
257
+  SEXP dim1;
258
+
259
+  double *matrix=NUMERIC_POINTER(RMatrix);
260
+  double *results, *resultsSE;
261
+  
262
+  int rows, cols;
263
+
264
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
265
+  rows = INTEGER(dim1)[0];
266
+  cols = INTEGER(dim1)[1];
267
+  UNPROTECT(1);
268
+
269
+
270
+  PROTECT(R_return_value = allocVector(VECSXP,2));
271
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
272
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
273
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
274
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
275
+  
276
+
277
+  results = NUMERIC_POINTER(R_summaries);
278
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
279
+
280
+  medianlog(matrix, rows, cols, results, resultsSE);
281
+  
282
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
283
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
284
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
285
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
286
+  UNPROTECT(1);
287
+
288
+  UNPROTECT(3);
289
+  return R_return_value;
290
+}
291
+
292
+
293
+
294
+
295
+SEXP R_colSummarize_median(SEXP RMatrix){
296
+
297
+
298
+  SEXP R_return_value;
299
+  SEXP R_return_value_names;
300
+
301
+  SEXP R_summaries;
302
+  SEXP R_summaries_se;
303
+
304
+  
305
+  SEXP dim1;
306
+
307
+  double *matrix=NUMERIC_POINTER(RMatrix);
308
+  double *results, *resultsSE;
309
+  
310
+  int rows, cols;
311
+
312
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
313
+  rows = INTEGER(dim1)[0];
314
+  cols = INTEGER(dim1)[1];
315
+  UNPROTECT(1);
316
+
317
+
318
+  PROTECT(R_return_value = allocVector(VECSXP,2));
319
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
320
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
321
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
322
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
323
+  
324
+
325
+  results = NUMERIC_POINTER(R_summaries);
326
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
327
+
328
+  colmedian(matrix, rows, cols, results, resultsSE);
329
+  
330
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
331
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
332
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
333
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
334
+  UNPROTECT(1);
335
+
336
+  UNPROTECT(3);
337
+  return R_return_value;
338
+}
339
+
340
+
341
+
342
+
343
+SEXP R_colSummarize_biweight_log(SEXP RMatrix){
344
+
345
+
346
+  SEXP R_return_value;
347
+  SEXP R_return_value_names;
348
+
349
+  SEXP R_summaries;
350
+  SEXP R_summaries_se;
351
+
352
+  
353
+  SEXP dim1;
354
+
355
+  double *matrix=NUMERIC_POINTER(RMatrix);
356
+  double *results, *resultsSE;
357
+  
358
+  int rows, cols;
359
+
360
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
361
+  rows = INTEGER(dim1)[0];
362
+  cols = INTEGER(dim1)[1];
363
+  UNPROTECT(1);
364
+
365
+
366
+  PROTECT(R_return_value = allocVector(VECSXP,2));
367
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
368
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
369
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
370
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
371
+  
372
+
373
+  results = NUMERIC_POINTER(R_summaries);
374
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
375
+
376
+  tukeybiweight(matrix, rows, cols, results, resultsSE);
377
+  
378
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
379
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
380
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
381
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
382
+  UNPROTECT(1);
383
+
384
+  UNPROTECT(3);
385
+  return R_return_value;
386
+}
387
+
388
+
389
+SEXP R_colSummarize_biweight(SEXP RMatrix){
390
+
391
+
392
+  SEXP R_return_value;
393
+  SEXP R_return_value_names;
394
+
395
+  SEXP R_summaries;
396
+  SEXP R_summaries_se;
397
+
398
+  
399
+  SEXP dim1;
400
+
401
+  double *matrix=NUMERIC_POINTER(RMatrix);
402
+  double *results, *resultsSE;
403
+  
404
+  int rows, cols;
405
+
406
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
407
+  rows = INTEGER(dim1)[0];
408
+  cols = INTEGER(dim1)[1];
409
+  UNPROTECT(1);
410
+
411
+
412
+  PROTECT(R_return_value = allocVector(VECSXP,2));
413
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
414
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
415
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
416
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
417
+  
418
+
419
+  results = NUMERIC_POINTER(R_summaries);
420
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
421
+
422
+  tukeybiweight_no_log(matrix, rows, cols, results, resultsSE);
423
+  
424
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
425
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
426
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
427
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
428
+  UNPROTECT(1);
429
+
430
+  UNPROTECT(3);
431
+  return R_return_value;
432
+}
433
+
434
+
435
+
436
+
437
+
438
+
439
+
440
+
441
+
442
+
443
+SEXP R_colSummarize_medianpolish_log(SEXP RMatrix){
444
+
445
+
446
+  SEXP R_return_value;
447
+  SEXP R_return_value_names;
448
+
449
+  SEXP R_summaries;
450
+  SEXP R_summaries_se;
451
+
452
+  
453
+  SEXP dim1;
454
+
455
+  double *matrix=NUMERIC_POINTER(RMatrix);
456
+  double *results, *resultsSE;
457
+  
458
+  double *resids;
459
+
460
+  
461
+  int rows, cols;
462
+
463
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
464
+  rows = INTEGER(dim1)[0];
465
+  cols = INTEGER(dim1)[1];
466
+  UNPROTECT(1);
467
+
468
+
469
+  PROTECT(R_return_value = allocVector(VECSXP,2));
470
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
471
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
472
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
473
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
474
+  
475
+
476
+  results = NUMERIC_POINTER(R_summaries);
477
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
478
+
479
+  resids = Calloc(rows*cols, double);
480
+  
481
+  memcpy(resids, matrix, rows*cols*sizeof(double));
482
+  
483
+  median_polish_log2_no_copy(resids, rows, cols, results, resultsSE);
484
+  
485
+  Free(resids);
486
+  
487
+
488
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
489
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
490
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
491
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
492
+  UNPROTECT(1);
493
+
494
+  UNPROTECT(3);
495
+  return R_return_value;
496
+}
497
+
498
+
499
+
500
+
501
+SEXP R_colSummarize_medianpolish(SEXP RMatrix){
502
+
503
+
504
+  SEXP R_return_value;
505
+  SEXP R_return_value_names;
506
+
507
+  SEXP R_summaries;
508
+  SEXP R_summaries_se;
509
+
510
+  
511
+  SEXP dim1;
512
+
513
+  double *matrix=NUMERIC_POINTER(RMatrix);
514
+  double *results, *resultsSE;
515
+  
516
+  double *resids;
517
+
518
+  
519
+  int rows, cols;
520
+
521
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
522
+  rows = INTEGER(dim1)[0];
523
+  cols = INTEGER(dim1)[1];
524
+  UNPROTECT(1);
525
+
526
+
527
+  PROTECT(R_return_value = allocVector(VECSXP,2));
528
+  PROTECT(R_summaries = allocVector(REALSXP,cols));
529
+  PROTECT(R_summaries_se = allocVector(REALSXP,cols));
530
+  SET_VECTOR_ELT(R_return_value,0,R_summaries);
531
+  SET_VECTOR_ELT(R_return_value,1,R_summaries_se);
532
+  
533
+
534
+  results = NUMERIC_POINTER(R_summaries);
535
+  resultsSE = NUMERIC_POINTER(R_summaries_se);
536
+
537
+  resids = Calloc(rows*cols, double);
538
+  
539
+  memcpy(resids, matrix, rows*cols*sizeof(double));
540
+  
541
+  median_polish_no_copy(resids, rows, cols, results, resultsSE);
542
+  
543
+  Free(resids);
544
+  
545
+
546
+  PROTECT(R_return_value_names= allocVector(STRSXP,2));
547
+  SET_VECTOR_ELT(R_return_value_names,0,mkChar("Estimates"));
548
+  SET_VECTOR_ELT(R_return_value_names,1,mkChar("StdErrors"));
549
+  setAttrib(R_return_value, R_NamesSymbol,R_return_value_names);
550
+  UNPROTECT(1);
551
+
552
+  UNPROTECT(3);
553
+  return R_return_value;
554
+}
0 555
new file mode 100644
... ...
@@ -0,0 +1,16 @@
1
+#ifndef R_COLSUMMARIZE_H
2
+#define R_COLSUMMARIZE_H
3
+
4
+SEXP R_colSummarize_avg_log(SEXP RMatrix);
5
+SEXP R_colSummarize_log_avg(SEXP RMatrix);
6
+SEXP R_colSummarize_log_median(SEXP RMatrix);
7
+SEXP R_colSummarize_median_log(SEXP RMatrix);
8
+SEXP R_colSummarize_biweight_log(SEXP RMatrix);
9
+SEXP R_colSummarize_medianpolish_log(SEXP RMatrix);
10
+
11
+SEXP R_colSummarize_avg(SEXP RMatrix);
12
+SEXP R_colSummarize_median(SEXP RMatrix);
13
+SEXP R_colSummarize_biweight(SEXP RMatrix);
14
+SEXP R_colSummarize_medianpolish(SEXP RMatrix);
15
+
16
+#endif
0 17
new file mode 100644
... ...
@@ -0,0 +1,233 @@
1
+/************************************************************************
2
+ **
3
+ ** avg.c
4
+ **
5
+ ** created by: B. M. Bolstad   <bmb@bmbolstad.com>
6
+ ** created on: Sep 16, 2007  (but based on earlier work from Nov avg_log.c)
7
+ **
8
+ ** Copyright (C) 2007 Ben Bolstad
9
+ **
10
+ ** last modified: Sept 16, 2007
11
+ **
12
+ ** License: GPL V2 or later (same as the rest of the Affy package)
13
+ **
14
+ ** General discussion
15
+ **
16
+ ** Implement average summarization
17
+ **
18
+ ** History
19
+ ** Sep 16, 2007 - Initial version
20
+ **
21
+ ** 
22
+ **
23
+ ************************************************************************/
24
+
25
+#include "avg.h"
26
+
27
+#include <R.h> 
28
+#include <Rdefines.h>
29
+#include <Rmath.h>
30
+#include <Rinternals.h>
31
+
32
+#include <stdio.h>
33
+#include <stdlib.h>
34
+#include <math.h>
35
+
36
+/***************************************************************************
37
+ **
38
+ ** double AvgLog(double *x, int length)
39
+ **
40
+ ** double *x - a vector of PM intensities  (previously log2 transformed)
41
+ ** int length - length of *x
42
+ **
43
+ ** take the average of log2 PM intensities.
44
+ **
45
+ ***************************************************************************/
46
+
47
+static double Avg(double *x, int length){
48
+  int i;
49
+  double sum = 0.0;
50
+  double mean = 0.0;
51
+
52
+  for (i=0; i < length; i++){
53
+    sum = sum + x[i];
54
+  }
55
+  
56
+  mean = sum/(double)length;
57
+
58
+  return (mean);    
59
+}
60
+
61
+/***************************************************************************
62
+ **
63
+ ** static double AvgLogSE(double *x, int length)
64
+ **
65
+ ** double *x - a vector of PM intensities (previously log2 transformed)
66
+ ** double mean - the mean of x computed using AvgLog above
67
+ ** int length - length of *x
68
+ **
69
+ ** compute the standard error of the average of log2 PM intensities.
70
+ ** 
71
+ **
72
+ ***************************************************************************/
73
+
74
+static double AvgSE(double *x, double mean, int length){
75
+  int i;
76
+  double sum = 0.0;
77
+
78
+  for (i=0; i < length; i++){
79
+    sum = sum + (x[i]- mean)*(x[i] - mean);
80
+  }
81
+  
82
+  sum = sqrt(sum/(double)(length -1));
83
+  sum = sum/sqrt((double)length);
84
+
85
+  return (sum);    
86
+}
87
+
88
+
89
+void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){
90
+  int i,j;
91
+
92
+  for (j = 0; j < cols; j++){
93
+    results[j] = Avg(&data[j*rows],rows);
94
+    resultsSE[j] = AvgSE(&data[j*rows],results[j],rows);
95
+  } 
96
+}
97
+
98
+
99
+/***************************************************************************
100
+ ** 
101
+ ** void average(double *data, int rows, int cols, double *results, double *resultsSE)
102
+ **
103
+ ** aim: given a data matrix of probe intensities, compute averages in column wise manner 
104
+ **      
105
+ **
106
+ ** double *data - Probe intensity matrix
107
+ ** int rows - number of rows in matrix *data (probes)
108
+ ** int cols - number of cols in matrix *data (chips)
109
+ ** int *cur_rows - indicies of rows corresponding to current probeset
110
+ ** double *results - already allocated location to store expression measures (cols length)
111
+ ** int nprobes - number of probes in current probeset.
112
+ **
113
+ ***************************************************************************/
114
+
115
+
116
+void colaverage(double *data, int rows, int cols, double *results, double *resultsSE){
117
+  int i,j;
118
+  double *z = Calloc(rows,double);
119
+
120
+  for (j = 0; j < cols; j++){
121
+    for (i =0; i < rows; i++){
122
+      z[i] = data[j*rows + i];  
123
+    }
124
+    results[j] = Avg(z,rows);
125
+    resultsSE[j] = AvgSE(z,results[j],rows);
126
+  } 
127
+  Free(z);
128
+
129
+}
130
+
131
+
132
+
133
+
134
+/***************************************************************************
135
+ **
136
+ ** double Average(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
137
+ **
138
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
139
+ **      corresponding to a single probeset, compute average log2 expression measure. 
140
+ **      Note that data is a probes by chips matrix.
141
+ **
142
+ ** double *data - Probe intensity matrix
143
+ ** int rows - number of rows in matrix *data (probes)
144
+ ** int cols - number of cols in matrix *data (chips)
145
+ ** int *cur_rows - indicies of rows corresponding to current probeset
146
+ ** double *results - already allocated location to store expression measures (cols length)
147
+ ** int nprobes - number of probes in current probeset.
148
+ **
149
+ ***************************************************************************/
150
+
151
+/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix 
152
+ *      corresponding to a single probeset, compute average log2 expression measure. 
153
+ *      Note that data is a probes by chips matrix. Also compute SE estimates
154
+ *
155
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
156
+ * @param rows the number of rows in the matrix 
157
+ * @param cols the number of columns in the matrix
158
+ * @param cur_rows a vector containing row indices to use
159
+ * @param results pre-allocated space to store output log2 averages. Should be of length cols
160
+ * @param nprobes number of probes in current set
161
+ * @param resultsSE pre-allocated space to store SE of log2 averages. Should be of length cols
162
+ *
163
+ *  
164
+ */
165
+
166
+void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){
167
+  int i,j;
168
+  double *z = Calloc(nprobes*cols,double);
169
+
170
+  for (j = 0; j < cols; j++){
171
+    for (i =0; i < nprobes; i++){
172
+      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
173
+    }
174
+  } 
175
+  
176
+  for (j=0; j < cols; j++){
177
+    results[j] = Avg(&z[j*nprobes],nprobes);
178
+    resultsSE[j] = AvgSE(&z[j*nprobes],results[j],nprobes);
179
+  }
180
+
181
+  Free(z);
182
+}
183
+
184
+
185
+/***************************************************************************
186
+ **
187
+ ** double AverageLog(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
188
+ **
189
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
190
+ **      corresponding to a single probeset, compute average log2 expression measure. 
191
+ **      Note that data is a probes by chips matrix.
192
+ **
193
+ ** double *data - Probe intensity matrix
194
+ ** int rows - number of rows in matrix *data (probes)
195
+ ** int cols - number of cols in matrix *data (chips)
196
+ ** int *cur_rows - indicies of rows corresponding to current probeset
197
+ ** double *results - already allocated location to store expression measures (cols length)
198
+ ** int nprobes - number of probes in current probeset.
199
+ **
200
+ ***************************************************************************/
201
+
202
+/*! \brief Given a data matrix of probe intensities, and a list of rows in the matrix 
203
+ *      corresponding to a single probeset, compute average log2 expression measure. 
204
+ *      Note that data is a probes by chips matrix. 
205
+ *
206
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
207
+ * @param rows the number of rows in the matrix 
208
+ * @param cols the number of columns in the matrix
209
+ * @param cur_rows a vector containing row indices to use
210
+ * @param results pre-allocated space to store output log2 averages. Should be of length cols
211
+ * @param nprobes number of probes in current set
212
+ *
213
+ *  
214
+ */
215
+
216
+void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){
217
+  int i,j;
218
+  double *z = Calloc(nprobes*cols,double);
219
+
220
+  for (j = 0; j < cols; j++){
221
+    for (i =0; i < nprobes; i++){
222
+      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
223
+    }
224
+  } 
225
+  
226
+  for (j=0; j < cols; j++){
227
+    results[j] = Avg(&z[j*nprobes],nprobes);
228
+  }
229
+  Free(z);
230
+}
231
+
232
+
233
+
0 234
new file mode 100644
... ...
@@ -0,0 +1,10 @@
1
+#ifndef AVG_H
2
+#define AVG_H
3
+
4
+void ColAverage(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE);
5
+void ColAverage_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes);
6
+
7
+void colaverage_no_copy(double *data, int rows, int cols, double *results, double *resultsSE);
8
+void colaverage(double *data, int rows, int cols, double *results, double *resultsSE);
9
+
10
+#endif
... ...
@@ -24,6 +24,7 @@
24 24
  ** Oct 10, 2003 - added threestepPLM version of this summary.
25 25
  ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation
26 26
  ** May 26, 2007 - fix memory leak in average_log. add additional interfaces
27
+ ** Sep 16, 2007 - fix error in how StdError is computed
27 28
  **
28 29
  ************************************************************************/
29 30
 
... ...
@@ -84,7 +85,8 @@ static double AvgLogSE(double *x, double mean, int length){
84 85
     sum = sum + (x[i]- mean)*(x[i] - mean);
85 86
   }
86 87
   
87
-  sum = sqrt(sum/(double)length);
88
+  sum = sqrt(sum/(double)(length-1));
89
+  sum = sum/sqrt((double)length);
88 90
 
89 91
   return (sum);    
90 92
 }
... ...
@@ -26,6 +26,7 @@
26 26
  ** Oct 10, 2003 - added in PLM version
27 27
  ** Apr 5, 2004 - Change mallocs to Callocs
28 28
  ** May 19, 2007 - branch out of affyPLM into a new package preprocessCore, then restructure the code. Add doxygen style documentation
29
+ ** Sep 16, 2007 - fix bug in tukeybiweight
29 30
  **
30 31
  ************************************************************************/
31 32
 
... ...
@@ -192,7 +193,7 @@ void tukeybiweight(double *data, int rows, int cols, double *results, double *re
192 193
 
193 194
   for (j = 0; j < cols; j++){
194 195
     for (i =0; i < rows; i++){
195
-      z[j*rows + i] = log(data[j*rows + i])/log(2.0);  
196
+      z[i] = log(data[j*rows + i])/log(2.0);  
196 197
     }
197 198
     results[j] = Tukey_Biweight(z,rows);
198 199
     resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows);
... ...
@@ -204,6 +205,40 @@ void tukeybiweight(double *data, int rows, int cols, double *results, double *re
204 205
 
205 206
 }
206 207
 
208
+
209
+
210
+
211
+void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE){
212
+  int i,j;
213
+  double *z = Calloc(rows,double);
214
+
215
+  for (j = 0; j < cols; j++){
216
+    for (i =0; i < rows; i++){
217
+      z[i] = data[j*rows + i];  
218
+    }
219
+    results[j] = Tukey_Biweight(z,rows);
220
+    resultsSE[j] = Tukey_Biweight_SE(z,results[j],rows);
221
+  }
222
+  Free(z);
223
+
224
+
225
+
226
+
227
+}
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
207 242
 /**********************************************************************************
208 243
  **
209 244
  ** void tukeybiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
... ...
@@ -2,6 +2,7 @@
2 2
 #define BIWEIGHT_H 1
3 3
 
4 4
 void tukeybiweight(double *data, int rows, int cols, double *results, double *resultsSE);
5
+void tukeybiweight_no_log(double *data, int rows, int cols, double *results, double *resultsSE);
5 6
 void TukeyBiweight(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE);
6 7
 double Tukey_Biweight(double *x, int length);
7 8
 
... ...
@@ -16,11 +16,14 @@
16 16
 
17 17
 #include "qnorm.h"
18 18
 #include "medianpolish.h"
19
+
19 20
 #include "log_avg.h"
20 21
 #include "avg_log.h"
22
+#include "avg.h"
21 23
 
22 24
 #include "median_log.h"
23 25
 #include "log_median.h"
26
+#include "median.h"
24 27
 
25 28
 #include "biweight.h"
26 29
 #include "lm.h"
... ...
@@ -28,7 +31,7 @@
28 31
 #include "rlm_se.h"
29 32
 
30 33
 #include "R_rlm_interfaces.h"
31
-
34
+#include "R_colSummarize.h"
32 35
 
33 36
 #include <R_ext/Rdynload.h>
34 37
 #include <Rdefines.h>
... ...
@@ -50,7 +53,16 @@ static const R_CallMethodDef callMethods[]  = {
50 53
   {"R_rlm_rma_default_model",(DL_FUNC)&R_rlm_rma_default_model,3},
51 54
   {"R_wrlm_rma_default_model", (DL_FUNC)&R_wrlm_rma_default_model,4},
52 55
   {"R_medianpolish_rma_default_model", (DL_FUNC)&R_medianpolish_rma_default_model,1},
53
-  {NULL, NULL, 0}
56
+  {"R_colSummarize_avg_log", (DL_FUNC)&R_colSummarize_avg_log,1},  
57
+  {"R_colSummarize_log_avg", (DL_FUNC)&R_colSummarize_log_avg,1},
58
+  {"R_colSummarize_median_log", (DL_FUNC)&R_colSummarize_median_log,1},
59
+  {"R_colSummarize_log_median", (DL_FUNC)&R_colSummarize_log_median,1},
60
+  {"R_colSummarize_biweight_log", (DL_FUNC)&R_colSummarize_biweight_log,1},
61
+  {"R_colSummarize_medianpolish_log",(DL_FUNC)&R_colSummarize_medianpolish_log,1},  
62
+  {"R_colSummarize_avg",(DL_FUNC)&R_colSummarize_avg,1},
63
+  {"R_colSummarize_median",(DL_FUNC)&R_colSummarize_median,1},
64
+  {"R_colSummarize_biweight", (DL_FUNC)&R_colSummarize_biweight,1},
65
+  {"R_colSummarize_medianpolish",(DL_FUNC)&R_colSummarize_medianpolish,1},{NULL, NULL, 0}
54 66
   };
55 67
 
56 68
 void R_init_preprocessCore(DllInfo *info){
... ...
@@ -81,6 +93,11 @@ void R_init_preprocessCore(DllInfo *info){
81 93
   R_RegisterCCallable("preprocessCore","averagelog", (DL_FUNC)&averagelog);
82 94
   R_RegisterCCallable("preprocessCore","AverageLog_noSE", (DL_FUNC)&AverageLog_noSE);
83 95
 
96
+  R_RegisterCCallable("preprocessCore","ColAverage", (DL_FUNC)&ColAverage);
97
+  R_RegisterCCallable("preprocessCore","colaverage_no_copy", (DL_FUNC)&colaverage_no_copy);
98
+  R_RegisterCCallable("preprocessCore","colaverage", (DL_FUNC)&colaverage);
99
+  R_RegisterCCallable("preprocessCore","ColAverage_noSE", (DL_FUNC)&ColAverage_noSE);
100
+
84 101
   R_RegisterCCallable("preprocessCore","MedianLog", (DL_FUNC)&MedianLog);
85 102
   R_RegisterCCallable("preprocessCore","medianlog_no_copy", (DL_FUNC)&medianlog_no_copy);
86 103
   R_RegisterCCallable("preprocessCore","medianlog", (DL_FUNC)&medianlog);
... ...
@@ -90,6 +107,11 @@ void R_init_preprocessCore(DllInfo *info){
90 107
   R_RegisterCCallable("preprocessCore","logmedian_no_copy", (DL_FUNC)&logmedian_no_copy);
91 108
   R_RegisterCCallable("preprocessCore","logmedian", (DL_FUNC)&logmedian);
92 109
  
110
+  R_RegisterCCallable("preprocessCore","ColMedian", (DL_FUNC)&ColMedian);
111
+  R_RegisterCCallable("preprocessCore","colmedian_no_copy", (DL_FUNC)&colmedian_no_copy);
112
+  R_RegisterCCallable("preprocessCore","colmedian", (DL_FUNC)&colmedian);
113
+  R_RegisterCCallable("preprocessCore","ColMedian_noSE", (DL_FUNC)&ColMedian_noSE);
114
+
93 115
   R_RegisterCCallable("preprocessCore","logaverage", (DL_FUNC)&logaverage);
94 116
   R_RegisterCCallable("preprocessCore","LogAverage", (DL_FUNC)&LogAverage);
95 117
 
96 118
new file mode 100644
... ...
@@ -0,0 +1,153 @@
1
+/************************************************************************
2
+ **
3
+ ** median.c
4
+ **
5
+ ** created by: B. M. Bolstad   <bmb@bmbolstad.com>
6
+ ** created on: Feb 6, 2003  (but based on earlier work from median_log.c)
7
+ **
8
+ ** Copyright (C) 2007   Ben Bolstad
9
+ **
10
+ ** last modified: Sep 16, 2007
11
+ **
12
+ ** License: GPL V2 or later (same as the rest of the Affy package)
13
+ **
14
+ ** General discussion
15
+ **
16
+ ** Implement median log2 pm summarization.
17
+ **
18
+ ** Sep 16, 2007 - initial version
19
+ **
20
+ ************************************************************************/
21
+
22
+#include "rma_common.h"
23
+#include "median.h"
24
+
25
+#include <R.h> 
26
+#include <Rdefines.h>
27
+#include <Rmath.h>
28
+#include <Rinternals.h>
29
+
30
+#include <stdio.h>
31
+#include <stdlib.h>
32
+#include <math.h>
33
+
34
+
35
+/***************************************************************************
36
+ **
37
+ ** double MedianLog(double *x, int length)
38
+ **
39
+ ** double *x - a vector of PM intensities 
40
+ ** int length - length of *x
41
+ **
42
+ ** take the log2 of the median of PM intensities.
43
+ **
44
+ ***************************************************************************/
45
+
46
+static double colmedian_wrapper(double *x, int length){
47
+
48
+  double med = 0.0;
49
+  
50
+  med = median_nocopy(x,length);
51
+ 
52
+  return (med);    
53
+}
54
+
55
+/***************************************************************************
56
+ **
57
+ ** double MedianLogPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
58
+ **
59
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
60
+ **      corresponding to a single probeset, compute log2 Median expression measure. 
61
+ **      Note that data is a probes by chips matrix.
62
+ **
63
+ ** double *data - Probe intensity matrix
64
+ ** int rows - number of rows in matrix *data (probes)
65
+ ** int cols - number of cols in matrix *data (chips)
66
+ ** int *cur_rows - indicies of rows corresponding to current probeset
67
+ ** double *results - already allocated location to store expression measures (cols length)
68
+ ** int nprobes - number of probes in current probeset.
69
+ **
70
+ ***************************************************************************/
71
+
72
+void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE){
73
+  int i,j;
74
+  double *z = Calloc(nprobes*cols,double);
75
+
76
+  for (j = 0; j < cols; j++){
77
+    for (i =0; i < nprobes; i++){
78
+      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
79
+    }
80
+  } 
81
+  
82
+  for (j=0; j < cols; j++){
83
+    results[j] = colmedian_wrapper(&z[j*nprobes],nprobes); 
84
+    resultsSE[j] = R_NaReal;
85
+  }
86
+  Free(z);
87
+}
88
+
89
+
90
+
91
+/***************************************************************************
92
+ **
93
+ ** double MedianLogPM_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
94
+ **
95
+ ** aim: given a data matrix of probe intensities, and a list of rows in the matrix 
96
+ **      corresponding to a single probeset, compute log2 Median expression measure. 
97
+ **      Note that data is a probes by chips matrix.
98
+ **
99
+ ** double *data - Probe intensity matrix
100
+ ** int rows - number of rows in matrix *data (probes)
101
+ ** int cols - number of cols in matrix *data (chips)
102
+ ** int *cur_rows - indicies of rows corresponding to current probeset
103
+ ** double *results - already allocated location to store expression measures (cols length)
104
+ ** int nprobes - number of probes in current probeset.
105
+ **
106
+ ***************************************************************************/
107
+
108
+void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes){
109
+  int i,j;
110
+  double *z = Calloc(nprobes*cols,double);
111
+
112
+  for (j = 0; j < cols; j++){
113
+    for (i =0; i < nprobes; i++){
114
+      z[j*nprobes + i] = data[j*rows + cur_rows[i]];  
115
+    }
116
+  } 
117
+  
118
+  for (j=0; j < cols; j++){
119
+    results[j] = colmedian_wrapper(&z[j*nprobes],nprobes);
120
+
121
+  }
122
+  Free(z);
123
+}
124
+
125
+
126
+
127
+void colmedian(double *data, int rows, int cols, double *results, double *resultsSE){
128
+  int i,j;
129
+  double *buffer = Calloc(rows, double);
130
+  
131
+  for (j=0; j < cols; j++){
132
+    for (i = 0; i < rows; i++){
133
+      buffer[i] = data[j*rows + i];
134
+    }
135
+    results[j] = colmedian_wrapper(buffer,rows); 
136
+    resultsSE[j] = R_NaReal;
137
+  }
138
+
139
+  Free(buffer);
140
+}
141
+
142
+
143
+
144
+
145
+void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE){
146
+  int i,j;
147
+    
148
+  for (j=0; j < cols; j++){
149
+    results[j] = colmedian_wrapper(&data[j*rows],rows); 
150
+    resultsSE[j] = R_NaReal;
151
+  }
152
+
153
+}
0 154
new file mode 100644
... ...
@@ -0,0 +1,15 @@
1
+#ifndef MEDIAN_H
2
+#define MEDIAN_H 1
3
+
4
+
5
+
6
+void ColMedian(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes, double *resultsSE);
7
+void ColMedian_noSE(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes);
8
+
9
+void colmedian(double *data, int rows, int cols, double *results, double *resultsSE);
10
+void colmedian_no_copy(double *data, int rows, int cols, double *results, double *resultsSE);
11
+
12
+
13
+
14
+
15
+#endif