git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/preprocessCore@25822 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -44,7 +44,8 @@ normalize.quantiles <- function(x,copy=TRUE){ |
44 | 44 |
|
45 | 45 |
#matrix(.C("qnorm_c", as.double(as.vector(x)), as.integer(rows), as.integer(cols))[[1]], rows, cols) |
46 | 46 |
|
47 |
- .Call("R_qnorm_c",x,copy, PACKAGE="preprocessCore"); |
|
47 |
+## .Call("R_qnorm_c",x,copy, PACKAGE="preprocessCore"); |
|
48 |
+ .Call("R_qnorm_c_handleNA",x,copy, PACKAGE="preprocessCore"); |
|
48 | 49 |
} |
49 | 50 |
|
50 | 51 |
|
... | ... |
@@ -12,14 +12,19 @@ |
12 | 12 |
\item{x}{A matrix of intensities where each column corresponds to a |
13 | 13 |
chip and each row is a probe.} |
14 | 14 |
\item{copy}{Make a copy of matrix before normalizing. Usually safer to |
15 |
- work with a copy} |
|
15 |
+ work with a copy, but in certain situations not making a copy of the |
|
16 |
+ matrix, but instead normalizing it in place will be more memory friendly.} |
|
16 | 17 |
} |
17 | 18 |
\details{This method is based upon the concept of a quantile-quantile |
18 | 19 |
plot extended to n dimensions. No special allowances are made for |
19 | 20 |
outliers. If you make use of quantile normalization |
20 | 21 |
please cite Bolstad et al, Bioinformatics (2003). |
21 | 22 |
|
22 |
- Note that this function does not handle missing (ie NA) values. |
|
23 |
+ This functions will handle missing data (ie NA values), based on the |
|
24 |
+ assumption that the data is missing at random. |
|
25 |
+ |
|
26 |
+ Note that the current implementation optimizes for better memory usage |
|
27 |
+ at the cost of some additional run-time. |
|
23 | 28 |
|
24 | 29 |
} |
25 | 30 |
|
... | ... |
@@ -28,6 +28,10 @@ |
28 | 28 |
\code{\link[affy]{rma}} or \code{\link[affy]{expresso}} |
29 | 29 |
please cite Bolstad et al, Bioinformatics (2003). |
30 | 30 |
|
31 |
+ |
|
32 |
+ These functions will handle missing data (ie NA values), based on the |
|
33 |
+ assumption that the data is missing at random. |
|
34 |
+ |
|
31 | 35 |
} |
32 | 36 |
|
33 | 37 |
\value{ |
... | ... |
@@ -54,10 +54,22 @@ |
54 | 54 |
** Nov 13, 2006 - remove median code |
55 | 55 |
** May 20, 2007 - move to preprocessCore. clean up code. |
56 | 56 |
** May 26, 2007 - fix memory leak in qnorm_c_determine_target |
57 |
- ** Jul 12, 2007 - improved ties handling (fixes off by "half" error which affects odd numbers of ties) |
|
57 |
+ ** Jul 12, 2007 - improved ties handling (fixes off by "half" error which affects even numbers of ties) |
|
58 |
+ ** Jul 14, 2007 - add NA handling to qnorm_c_using_target and qnorm_c_determine_target |
|
58 | 59 |
** |
59 | 60 |
***********************************************************/ |
60 | 61 |
|
62 |
+/***************************************************************************************************** |
|
63 |
+ ***************************************************************************************************** |
|
64 |
+ ** |
|
65 |
+ ** GENERAL NOTE: Many of the functions take pointers for arguements that are essentially just |
|
66 |
+ ** int's. This is mostly legacy for when the functions were called via .C() in R rather |
|
67 |
+ ** than via the .Call() interface. |
|
68 |
+ ** |
|
69 |
+ ***************************************************************************************************** |
|
70 |
+ *****************************************************************************************************/ |
|
71 |
+ |
|
72 |
+ |
|
61 | 73 |
#include <stdio.h> |
62 | 74 |
#include <stdlib.h> |
63 | 75 |
#include <math.h> |
... | ... |
@@ -1179,7 +1191,6 @@ SEXP R_qnorm_robust_weights(SEXP X, SEXP remove_extreme, SEXP n_remove){ |
1179 | 1191 |
** normalized to) |
1180 | 1192 |
** int *targetrows - length of target distribution vector |
1181 | 1193 |
** |
1182 |
- ** Note that it is assumed that there is no NA or Inf type values in the vectors. (ie no missing data) |
|
1183 | 1194 |
** |
1184 | 1195 |
** if targetrows == rows then the standard methodology is used. |
1185 | 1196 |
** |
... | ... |
@@ -1591,6 +1602,77 @@ SEXP R_qnorm_determine_target(SEXP X, SEXP targetlength){ |
1591 | 1602 |
|
1592 | 1603 |
|
1593 | 1604 |
|
1605 |
+ |
|
1606 |
+/********************************************************* |
|
1607 |
+ ** |
|
1608 |
+ ** void qnorm_c_handleNA(double *data, int *rows, int *cols) |
|
1609 |
+ ** |
|
1610 |
+ ** this is the function that actually implements the |
|
1611 |
+ ** quantile normalization algorithm. It is called from R. |
|
1612 |
+ ** |
|
1613 |
+ ** returns 1 if there is a problem, 0 otherwise |
|
1614 |
+ ** |
|
1615 |
+ ** Note that this function does not handle missing data (ie NA) |
|
1616 |
+ ** |
|
1617 |
+ ********************************************************/ |
|
1618 |
+ |
|
1619 |
+ |
|
1620 |
+void qnorm_c_handleNA(double *data, int *rows, int *cols){ |
|
1621 |
+ |
|
1622 |
+ double *target = Calloc(*rows,double); |
|
1623 |
+ |
|
1624 |
+ qnorm_c_determine_target(data, rows, cols, target, rows); |
|
1625 |
+ qnorm_c_using_target(data, rows, cols, target, rows); |
|
1626 |
+ |
|
1627 |
+ Free(target); |
|
1628 |
+ |
|
1629 |
+} |
|
1630 |
+ |
|
1631 |
+ |
|
1632 |
+/********************************************************* |
|
1633 |
+ ** |
|
1634 |
+ ** SEXP R_qnorm_c_handleNA(SEXP X) |
|
1635 |
+ ** |
|
1636 |
+ ** SEXP X - a matrix |
|
1637 |
+ ** SEXP copy - a flag if TRUE then make copy |
|
1638 |
+ ** before normalizing, if FALSE work in place |
|
1639 |
+ ** note that this can be dangerous since |
|
1640 |
+ ** it will change the original matrix. |
|
1641 |
+ ** |
|
1642 |
+ ** returns a quantile normalized matrix. |
|
1643 |
+ ** |
|
1644 |
+ ** This is a .Call() interface for quantile normalization |
|
1645 |
+ ** |
|
1646 |
+ *********************************************************/ |
|
1647 |
+ |
|
1648 |
+SEXP R_qnorm_c_handleNA(SEXP X, SEXP copy){ |
|
1649 |
+ |
|
1650 |
+ SEXP Xcopy,dim1; |
|
1651 |
+ double *Xptr; |
|
1652 |
+ int rows,cols; |
|
1653 |
+ |
|
1654 |
+ PROTECT(dim1 = getAttrib(X,R_DimSymbol)); |
|
1655 |
+ rows = INTEGER(dim1)[0]; |
|
1656 |
+ cols = INTEGER(dim1)[1]; |
|
1657 |
+ if (asInteger(copy)){ |
|
1658 |
+ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); |
|
1659 |
+ copyMatrix(Xcopy,X,0); |
|
1660 |
+ } else { |
|
1661 |
+ Xcopy = X; |
|
1662 |
+ } |
|
1663 |
+ Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); |
|
1664 |
+ |
|
1665 |
+ qnorm_c_handleNA(Xptr, &rows, &cols); |
|
1666 |
+ if (asInteger(copy)){ |
|
1667 |
+ UNPROTECT(2); |
|
1668 |
+ } else { |
|
1669 |
+ UNPROTECT(1); |
|
1670 |
+ } |
|
1671 |
+ return Xcopy; |
|
1672 |
+} |
|
1673 |
+ |
|
1674 |
+ |
|
1675 |
+ |
|
1594 | 1676 |
/***************************************************************************************************** |
1595 | 1677 |
***************************************************************************************************** |
1596 | 1678 |
** |