Browse code

Commit made by the Bioconductor Git-SVN bridge. Consists of 3 commits.

Commit information:

Commit id: cdc1110d5ba0195ad1c817dac88ee20ed65fd06d

update version number to reflect that a number of changes have occured in the C level API. Mostly only affyPLM uses this API.

Committed by: Ben Bolstad
Author Name: Ben Bolstad
Commit date: 2014-09-03 19:15:52 -0700
Author date: 2014-09-03 19:15:52 -0700

Commit id: 4ce27cd0581d5340757258539cfc33552595254e

change kernel density estimation function interface

Committed by: Ben Bolstad
Author Name: Ben Bolstad
Commit date: 2014-09-03 19:15:20 -0700
Author date: 2014-09-03 19:15:20 -0700

Commit id: 6fcb1768abd96e097c8ebe60177d4896c304b2e7

Documentation updates and cleanup

Committed by: Ben Bolstad
Author Name: Ben Bolstad
Commit date: 2014-09-01 18:50:59 -0700
Author date: 2014-09-01 18:50:59 -0700


git-svn-id: https://hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/preprocessCore@93789 bc3139a8-67e5-0310-9ffc-ced21a209358

bolstad authored on 04/09/2014 02:17:14
Showing 7 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: preprocessCore
2
-Version: 1.27.1
2
+Version: 1.27.2
3 3
 Title: A collection of pre-processing functions
4 4
 Author: Benjamin Milo Bolstad <bmb@bmbolstad.com>
5 5
 Maintainer: Benjamin Milo Bolstad <bmb@bmbolstad.com>
... ...
@@ -4,13 +4,14 @@
4 4
 #ifndef WEIGHTEDKERNELDENSITY_STUBS_H
5 5
 #define WEIGHTEDKERNELDENSITY_STUBS_H 1
6 6
 
7
+#include <stddef.h>
7 8
 
8
-void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj){
9
+void KernelDensity(double *x, size_t nxxx, double *weights, double *output, double *output_x, size_t nout, int kernel_fn, int bandwidth_fn, double bandwidth_adj){
9 10
    
10
-  static void(*fun)(double*, int*,  double*, double*, double*, int *, int *, int *, double *) = NULL;
11
+  static void(*fun)(double*, size_t,  double*, double*, double*, size_t, int, int, double) = NULL;
11 12
   
12 13
   if (fun == NULL)
13
-    fun =  (void(*)(double*, int*,  double*, double*, double*, int *, int *, int *, double *))R_GetCCallable("preprocessCore","KernelDensity");
14
+    fun =  (void(*)(double*, size_t,  double*, double*, double*, size_t, int, int, double))R_GetCCallable("preprocessCore","KernelDensity");
14 15
   
15 16
   fun(x, nxxx, weights, output, output_x, nout, kernel_fn, bandwidth_fn, bandwidth_adj);
16 17
   
... ...
@@ -1 +1 @@
1
-void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj);
1
+void KernelDensity(double *x, size_t nxxx, double *weights, double *output, double *output_x, size_t nout, int kernel_fn, int bandwidth_fn, double bandwidth_adj);
... ...
@@ -5,7 +5,7 @@
5 5
  ** created by: B. M. Bolstad   <bmb@bmbolstad.com>
6 6
  ** created on: Feb 6, 2003  (but based on earlier work from Nov 2002)
7 7
  **
8
- ** Copyright (C) 2003-2007   Ben Bolstad
8
+ ** Copyright (C) 2003-2014   Ben Bolstad
9 9
  **
10 10
  ** last modified: Feb 6, 2003
11 11
  **
... ...
@@ -20,6 +20,7 @@
20 20
  ** Jul 23, 2003 - add SE parameter (but not yet implemented)
21 21
  ** Oct 10, 2003 - added PLM version
22 22
  ** Sept 9, 2007 - branch out of affyPLM into a new package preprocessCore
23
+ ** Sept, 2014 - Documentation clean up. Change to size_t where appropriate
23 24
  **
24 25
  ************************************************************************/
25 26
 
... ...
@@ -56,6 +57,8 @@ static double median_log(double *x, size_t length){
56 57
   return (med);    
57 58
 }
58 59
 
60
+
61
+
59 62
 /***************************************************************************
60 63
  **
61 64
  ** double MedianLogPM(double *data, int rows, int cols, int *cur_rows, double *results, int nprobes)
... ...
@@ -73,6 +76,24 @@ static double median_log(double *x, size_t length){
73 76
  **
74 77
  ***************************************************************************/
75 78
 
79
+/*! \brief  \f$\log_2\f$ transform the data and compute the median 
80
+ * 
81
+ *  Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median. Also compute SE of this estimate
82
+ *  on a column by column basis using only a specified subset of rows. Specifically, the median of each column is based on
83
+ *  \f$\log_2\f$ transformed data. The sample standard error is also computed. 
84
+ *    
85
+ *
86
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
87
+ * @param rows the number of rows in the matrix 
88
+ * @param cols the number of columns in the matrix
89
+ * @param cur_rows indices specifying which rows in the matrix to use
90
+ * @param results pre-allocated space to store output log2 medians. Should be of length cols
91
+ * @param nprobes the number of rows in cur_rows
92
+ * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols
93
+ *
94
+ *  
95
+ */
96
+
76 97
 void MedianLog(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes, double *resultsSE){
77 98
 
78 99
   size_t i,j;
... ...
@@ -110,6 +131,21 @@ void MedianLog(double *data, size_t rows, size_t cols, int *cur_rows, double *re
110 131
  **
111 132
  ***************************************************************************/
112 133
 
134
+/*! \brief  \f$\log_2\f$ transform the data and compute the median 
135
+ * 
136
+ *  Given a data matrix of probe intensities \f$\log_2\f$ transform it and then compute the median on a column by column basis using only a specified subset of rows. 
137
+ *  Specifically, the median of each column is based on \f$\log_2\f$ transformed data.
138
+ *    
139
+ *
140
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
141
+ * @param rows the number of rows in the matrix 
142
+ * @param cols the number of columns in the matrix
143
+ * @param cur_rows indices specifying which rows in the matrix to use
144
+ * @param results pre-allocated space to store output log2 medians. Should be of length cols
145
+ * @param nprobes the number of rows in cur_rows
146
+ *  
147
+ */
148
+
113 149
 void MedianLog_noSE(double *data, size_t rows, size_t cols, int *cur_rows, double *results, size_t nprobes){
114 150
 
115 151
   size_t i,j;
... ...
@@ -130,6 +166,23 @@ void MedianLog_noSE(double *data, size_t rows, size_t cols, int *cur_rows, doubl
130 166
 
131 167
 
132 168
 
169
+/*! \brief compute the median for each column of \f$\log_2\f$ transformed data.
170
+ * 
171
+ *  Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate
172
+ *  on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed.
173
+ *  The sample standard error is also computed. On output the data matrix will
174
+ *  be unchanged.
175
+ *    
176
+ *
177
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
178
+ * @param rows the number of rows in the matrix 
179
+ * @param cols the number of columns in the matrix
180
+ * @param results pre-allocated space to store output log2 medians. Should be of length cols
181
+ * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols
182
+ *
183
+ *  
184
+ */
185
+
133 186
 void medianlog(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
134 187
 
135 188
   size_t i,j;
... ...
@@ -148,6 +201,22 @@ void medianlog(double *data, size_t rows, size_t cols, double *results, double *
148 201
 
149 202
 
150 203
 
204
+/*! \brief compute the median for each column of \f$\log_2\f$ transformed data.
205
+ * 
206
+ *  Given a data matrix of probe intensities \f$\log_2\f$ transform it then compute median of each column. Also produce the SE of this estimate
207
+ *  on a column by column basis. Specifically, the median is computed for each column and then \f$\log_2\f$ transformed.
208
+ *  The sample standard error is also computed. On output the data matrix will
209
+ *  be changed.
210
+ *    
211
+ *
212
+ * @param data a matrix containing data stored column-wise stored in rows*cols length of memory
213
+ * @param rows the number of rows in the matrix 
214
+ * @param cols the number of columns in the matrix
215
+ * @param results pre-allocated space to store output log2 medians. Should be of length cols
216
+ * @param resultsSE pre-allocated space to store SE of log2 medians. Should be of length cols
217
+ *
218
+ *  
219
+ */
151 220
 
152 221
 void medianlog_no_copy(double *data, size_t rows, size_t cols, double *results, double *resultsSE){
153 222
 
... ...
@@ -119,7 +119,7 @@ static double max_density(double *z, size_t rows, size_t cols, size_t column){
119 119
   }
120 120
   
121 121
   
122
-  KernelDensity_lowmem(x,&rows,dens_y,dens_x,&npts);
122
+  KernelDensity_lowmem(x,rows,dens_y,dens_x,npts);
123 123
 
124 124
   max_y = find_max(dens_y,16384);
125 125
    
... ...
@@ -4,7 +4,7 @@
4 4
  **
5 5
  ** aim : compute weighted kernel density estimates
6 6
  **
7
- ** Copyright (C) 2003-2008 Ben Bolstad
7
+ ** Copyright (C) 2003-2014 Ben Bolstad
8 8
  **
9 9
  ** created on: Mar 24, 2003
10 10
  **
... ...
@@ -26,6 +26,7 @@
26 26
  ** Mar 24, 2005 - Add in IQR function to handle obscure cases.
27 27
  ** Mar 15, 2008 - add KernelDensity_lowmem. weightedkerneldensity.c is ported from affyPLM to preprocessCore
28 28
  ** Oct 31, 2011 - Add additional kernels. Allow non-power of 2 nout in KernelDensity. Fix error in bandwidth calculation
29
+ ** Sept, 2014 - Change function definition/declarations so size inputs are not pointers (and are actually size_t rather than int)
29 30
  **
30 31
  ****************************************************************************/
31 32
 
... ...
@@ -35,6 +36,8 @@
35 36
 #include <math.h>
36 37
 #include <stdlib.h>
37 38
 #include <string.h> /* For memcpy */
39
+#include <stddef.h>
40
+
38 41
 #include "rma_common.h"
39 42
 
40 43
 #include "weightedkerneldensity.h"
... ...
@@ -43,46 +46,46 @@
43 46
 
44 47
 /*****************************************************************************
45 48
  **
46
- ** void weighted_massdist(double *x, int nx, double *w, double *xlow, double *xhigh, double *y, int *ny)
49
+ ** void weighted_massdist(double *x, size_t nx, double *w, double xlow, double xhigh, double *y, size_t ny)
47 50
  **
48 51
  ** see AS R50 and AS 176  (AS = Applied Statistics)
49 52
  **
50 53
  ** idea is to discretize the data,  but have modified algorithm to put weights on each observation 
51 54
  **
52 55
  ** double *x - the data
53
- ** int nx - length of x
56
+ ** size_t nx - length of x
54 57
  ** double *w - weight for each one of x, vector should also be of length nx
55
- ** double *xlow - minimum value in x dimension
56
- ** double *xhigh - maximum value in x dimension
58
+ ** double xlow - minimum value in x dimension
59
+ ** double xhigh - maximum value in x dimension
57 60
  ** double *y - on output will contain discretation scheme of data
58
- ** int ny - length of y
61
+ ** size_t ny - length of y
59 62
  **
60 63
  ****************************************************************************/
61 64
 
62
-static void weighted_massdist(double *x, int *nx, double *w, double *xlow, double *xhigh, double *y, int *ny){
65
+static void weighted_massdist(double *x, size_t nx, double *w, double xlow, double xhigh, double *y, size_t ny){
63 66
   
64 67
   double fx, xdelta, xmass, xpos;
65
-  int i, ix, ixmax, ixmin;
68
+  size_t i, ix, ixmax, ixmin;
66 69
   
67 70
   ixmin = 0;
68
-  ixmax = *ny - 2;
71
+  ixmax = ny - 2;
69 72
   xmass = 0.0;
70
-  xdelta = (*xhigh - *xlow) / (*ny - 1);
73
+  xdelta = (xhigh - xlow) / (ny - 1);
71 74
   
72
-  for(i=0; i < *ny ; i++){
75
+  for(i=0; i < ny ; i++){
73 76
     y[i] = 0.0;
74 77
   }
75 78
 
76
-  for (i=0; i < *nx; i++){
79
+  for (i=0; i < nx; i++){
77 80
     xmass += w[i];
78 81
   }
79 82
   
80 83
   xmass = 1.0/xmass;
81 84
   /* Rprintf("%f\n",xmass);*/
82 85
 
83
-  for(i=0; i < *nx ; i++) {
86
+  for(i=0; i < nx ; i++) {
84 87
     if(R_FINITE(x[i])) {
85
-      xpos = (x[i] - *xlow) / xdelta;
88
+      xpos = (x[i] - xlow) / xdelta;
86 89
       ix = floor(xpos);
87 90
       fx = xpos - ix;
88 91
       if(ixmin <= ix && ix <= ixmax) {
... ...
@@ -98,45 +101,45 @@ static void weighted_massdist(double *x, int *nx, double *w, double *xlow, doubl
98 101
     }
99 102
   }
100 103
   
101
-  for(i=0; i < *ny; i++)
104
+  for(i=0; i < ny; i++)
102 105
     y[i] *= xmass;
103 106
   
104 107
 }
105 108
 
106 109
 /*****************************************************************************
107 110
  **
108
- ** void unweighted_massdist(double *x, int nx, double *xlow, double *xhigh, double *y, int *ny)
111
+ ** void unweighted_massdist(double *x, size_t nx, double xlow, double xhigh, double *y, size_t ny)
109 112
  **
110 113
  ** see AS R50 and AS 176  (AS = Applied Statistics)
111 114
  **
112 115
  ** idea is to discretize the data,  does not put weights on each observation 
113 116
  **
114 117
  ** double *x - the data
115
- ** int nx - length of x
118
+ ** size_t nx - length of x
116 119
  ** double *w - weight for each one of x, vector should also be of length nx
117
- ** double *xlow - minimum value in x dimension
118
- ** double *xhigh - maximum value in x dimension
120
+ ** double xlow - minimum value in x dimension
121
+ ** double xhigh - maximum value in x dimension
119 122
  ** double *y - on output will contain discretation scheme of data
120
- ** int ny - length of y
123
+ ** size_t ny - length of y
121 124
  **
122 125
  ****************************************************************************/
123 126
 
124
-static void unweighted_massdist(double *x, int *nx, double *xlow, double *xhigh, double *y, int *ny){
127
+static void unweighted_massdist(double *x, size_t nx, double xlow, double xhigh, double *y, size_t ny){
125 128
   
126 129
   double fx, xdelta, xpos;
127
-  int i, ix, ixmax, ixmin;
130
+  size_t i, ix, ixmax, ixmin;
128 131
   
129 132
   ixmin = 0;
130
-  ixmax = *ny - 2;
131
-  xdelta = (*xhigh - *xlow) / (*ny - 1);
133
+  ixmax = ny - 2;
134
+  xdelta = (xhigh - xlow) / (ny - 1);
132 135
   
133
-  for(i=0; i < *ny ; i++){
136
+  for(i=0; i < ny ; i++){
134 137
     y[i] = 0.0;
135 138
   }
136 139
 
137
-  for(i=0; i < *nx ; i++) {
140
+  for(i=0; i < nx ; i++) {
138 141
     if(R_FINITE(x[i])) {
139
-      xpos = (x[i] - *xlow) / xdelta;
142
+      xpos = (x[i] - xlow) / xdelta;
140 143
       ix = (int)floor(xpos);
141 144
       fx = xpos - ix;
142 145
       if(ixmin <= ix && ix <= ixmax) {
... ...
@@ -151,8 +154,8 @@ static void unweighted_massdist(double *x, int *nx, double *xlow, double *xhigh,
151 154
       }
152 155
     }
153 156
   }
154
-  for(i=0; i < *ny; i++)
155
-    y[i] *= (1.0/(double)(*nx));
157
+  for(i=0; i < ny; i++)
158
+    y[i] *= (1.0/(double)(nx));
156 159
 }
157 160
 
158 161
 
... ...
@@ -628,36 +631,37 @@ static double IQR(double *x, int length);
628 631
 
629 632
 /**********************************************************************
630 633
  **
631
- ** void KernelDensity(double *x, int *nxxx, double *output, double *xords, double *weights)
634
+ ** void KernelDensity(double *x, size_t nxxx,  double *weights, double *output, double *output_x, size_t nout, int kernel_fn, int bandwidth_fn, double bandwidth_adj)
632 635
  **
633 636
  ** double *x - data vector
634
- ** int *nxxx - length of x
637
+ ** size_t nxxx - length of x
638
+ ** double *weights - a weight for each observation in x
635 639
  ** double *output - place to output density values
636
- ** double *xords - x coordinates corresponding to output
637
- ** double *weights - a weight for each item of *x should be of length *nxxx
638
- ** int *nout - length of output should be a power of two, preferably 512 or above
639
- **
640
- ** 
640
+ ** double *output_x - x coordinates corresponding to output
641
+ ** size_t nout - length of output should be a power of two, preferably 512 or above
642
+ ** int kernel_fn - which kernel function to use (see above for integer code mapping)
643
+ ** int bandwidth_fn - which bandwidth function to use (see above for integer code mapping)
644
+ ** double bandwidth_adj - adjustment factor for bandwidth
641 645
  **
642 646
  **********************************************************************/
643 647
 
644
-void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj){
648
+void KernelDensity(double *x, size_t nxxx, double *weights, double *output, double *output_x, size_t nout, int kernel_fn, int bandwidth_fn, double bandwidth_adj){
645 649
 
646
-  int nx = *nxxx;
650
+  size_t nx = nxxx;
647 651
 
648
-  int nuser = *nout;
649
-  int n;  /* = *nout;  */
650
-  int n2;  /* == 2*n;  */
651
-  int i;
652
+  size_t nuser = nout;
653
+  size_t n;  /* = *nout;  */
654
+  size_t n2;  /* == 2*n;  */
655
+  size_t i;
652 656
   double low, high, iqr, bw, to, from;
653 657
   double *kords;  /*  = Calloc(2*n,double);*/
654 658
   double *buffer;  /*  = Calloc(nx,double);*/
655 659
   double *y;  /*   = Calloc(2*n,double);*/
656 660
   double *xords;  /*    = Calloc(n,double);*/
657 661
 
658
-  int kern_fn=*kernel_fn;
659
-  int bw_fn=*bandwidth_fn;
660
-  double bw_adj = *bandwidth_adj;
662
+  int kern_fn=kernel_fn;
663
+  int bw_fn=bandwidth_fn;
664
+  double bw_adj = bandwidth_adj;
661 665
 	
662 666
   n = (int)pow(2.0,ceil(log2(nuser))); 
663 667
 
... ...
@@ -701,7 +705,7 @@ void KernelDensity(double *x, int *nxxx, double *weights, double *output, double
701 705
   
702 706
   kernelize(kords, 2*n,bw,kern_fn);
703 707
 
704
-  weighted_massdist(x, &nx, weights, &low, &high, y, &n);
708
+  weighted_massdist(x, nx, weights, low, high, y, n);
705 709
 
706 710
   fft_density_convolve(y,kords,n2);
707 711
   to = high - 4*bw;  /* corrections to get on correct output range */
... ...
@@ -781,25 +785,25 @@ static double IQR(double *x, int length){
781 785
 
782 786
 /**********************************************************************
783 787
  **
784
- ** void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *xords, double *weights)
788
+ ** void KernelDensity_lowmem(double *x, int nxxx, double *output,  double *output_x, size_t nout)
785 789
  **
786 790
  ** double *x - data vector (note order will be changed on output)
787
- ** int *nxxx - length of x
791
+ ** size_t nxxx - length of x
788 792
  ** double *output - place to output density values
789
- ** double *xords - x coordinates corresponding to output
790
- ** double *weights - a weight for each item of *x should be of length *nxxx
791
- ** int *nout - length of output should be a power of two, preferably 512 or above
793
+ ** double *output_x - x coordinates corresponding to output
794
+ ** size_t nout - length of output should be a power of two, preferably 512 or above
792 795
  **
793 796
  ** 
794 797
  **********************************************************************/
795 798
 
796
-void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *output_x, int *nout){
799
+void KernelDensity_lowmem(double *x, size_t nxxx, double *output, double *output_x, size_t nout){
797 800
 
798
-  int nx = *nxxx;
801
+  size_t nx = nxxx;
802
+
803
+  size_t n = nout;
804
+  size_t n2= 2*n;
805
+  size_t i;
799 806
 
800
-  int n = *nout;
801
-  int n2= 2*n;
802
-  int i;
803 807
   double low, high,iqr,bw,from,to;
804 808
   double *kords = Calloc(2*n,double);
805 809
   double *buffer = x; 
... ...
@@ -832,7 +836,7 @@ void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *output_x
832 836
 
833 837
   kernelize(kords, 2*n,bw,2);
834 838
 
835
-  unweighted_massdist(x, &nx, &low, &high, y, &n);
839
+  unweighted_massdist(x, nx, low, high, y, n);
836 840
 
837 841
   fft_density_convolve(y,kords,n2);
838 842
 
... ...
@@ -1,7 +1,7 @@
1 1
 #ifndef WEIGHTEDKERNELDENSITY_H
2 2
 #define WEIGHTEDKERNELDENSITY_H
3 3
 
4
-void KernelDensity(double *x, int *nxxx, double *weights, double *output, double *output_x, int *nout, int *kernel_fn, int *bandwidth_fn, double *bandwidth_adj);
5
-void KernelDensity_lowmem(double *x, int *nxxx, double *output, double *output_x, int *nout);
4
+void KernelDensity(double *x, size_t nxxx, double *weights, double *output, double *output_x, size_t nout, int kernel_fn, int bandwidth_fn, double bandwidth_adj);
5
+void KernelDensity_lowmem(double *x, size_t nxxx, double *output, double *output_x, size_t nout);
6 6
 
7 7
 #endif