git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/preprocessCore@25783 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -44,7 +44,7 @@ normalize.quantiles <- function(x,copy=TRUE){ |
44 | 44 |
|
45 | 45 |
#matrix(.C("qnorm_c", as.double(as.vector(x)), as.integer(rows), as.integer(cols))[[1]], rows, cols) |
46 | 46 |
|
47 |
- .Call("R_qnorm_c",x,copy, PACKAGE="preprocessCore"); |
|
47 |
+ .Call("R_qnorm_c_improvedties",x,copy, PACKAGE="preprocessCore"); |
|
48 | 48 |
} |
49 | 49 |
|
50 | 50 |
|
... | ... |
@@ -34,6 +34,9 @@ |
34 | 34 |
most different from all the other means, \bold{both} removes first |
35 | 35 |
extreme variance and then an extreme mean. The option \bold{none} does |
36 | 36 |
not remove any chips, but will assign equal weights to all chips. |
37 |
+ |
|
38 |
+ Note that this function does not handle missing values (ie |
|
39 |
+ NA). Unexpected results might occur in this situation. |
|
37 | 40 |
|
38 | 41 |
} |
39 | 42 |
\note{This function is still experimental.} |
... | ... |
@@ -54,6 +54,7 @@ |
54 | 54 |
** Nov 13, 2006 - remove median code |
55 | 55 |
** May 20, 2007 - move to preprocessCore. clean up code. |
56 | 56 |
** May 26, 2007 - fix memory leak in qnorm_c_determine_target |
57 |
+ ** Jul 12, 2007 - improved ties handling (fixes off by one error) |
|
57 | 58 |
** |
58 | 59 |
***********************************************************/ |
59 | 60 |
|
... | ... |
@@ -333,7 +334,7 @@ static double med_abs(double *x, int length){ |
333 | 334 |
/***************************************************************************************************** |
334 | 335 |
***************************************************************************************************** |
335 | 336 |
** |
336 |
- ** The following block implements the standard quantile normalization function |
|
337 |
+ ** The following block implements the standard quantile normalization function (aka "classic") |
|
337 | 338 |
** |
338 | 339 |
** |
339 | 340 |
***************************************************************************************************** |
... | ... |
@@ -348,6 +349,8 @@ static double med_abs(double *x, int length){ |
348 | 349 |
** |
349 | 350 |
** returns 1 if there is a problem, 0 otherwise |
350 | 351 |
** |
352 |
+ ** Note that this function does not handle missing data (ie NA) |
|
353 |
+ ** |
|
351 | 354 |
********************************************************/ |
352 | 355 |
|
353 | 356 |
int qnorm_c(double *data, int *rows, int *cols){ |
... | ... |
@@ -474,6 +477,8 @@ SEXP R_qnorm_c(SEXP X, SEXP copy){ |
474 | 477 |
** |
475 | 478 |
** This function implements the "robust" quantile normalizer |
476 | 479 |
** |
480 |
+ ** Note that this function does not handle NA values. |
|
481 |
+ ** |
|
477 | 482 |
********************************************************/ |
478 | 483 |
|
479 | 484 |
int qnorm_robust_c(double *data,double *weights, int *rows, int *cols, int *use_median, int *use_log2, int *weight_scheme){ |
... | ... |
@@ -1596,3 +1601,123 @@ SEXP R_qnorm_within_blocks(SEXP X,SEXP blocks,SEXP copy){ |
1596 | 1601 |
|
1597 | 1602 |
} |
1598 | 1603 |
|
1604 |
+ |
|
1605 |
+ |
|
1606 |
+ |
|
1607 |
+ |
|
1608 |
+ |
|
1609 |
+ |
|
1610 |
+/********************************************************* |
|
1611 |
+ ** |
|
1612 |
+ ** void qnorm_c_improved(double *data, int *rows, int *cols) |
|
1613 |
+ ** |
|
1614 |
+ ** this is the function that actually implements the |
|
1615 |
+ ** quantile normalization algorithm. It is called from R. |
|
1616 |
+ ** |
|
1617 |
+ ** returns 1 if there is a problem, 0 otherwise |
|
1618 |
+ ** |
|
1619 |
+ ** Note that this function does not handle missing data (ie NA) |
|
1620 |
+ ** |
|
1621 |
+ ********************************************************/ |
|
1622 |
+ |
|
1623 |
+int qnorm_c_improvedties(double *data, int *rows, int *cols){ |
|
1624 |
+ int i,j,ind; |
|
1625 |
+ dataitem **dimat; |
|
1626 |
+ /* double sum; */ |
|
1627 |
+ double *row_mean = (double *)Calloc((*rows),double); |
|
1628 |
+ double *datvec; /* = (double *)Calloc(*cols,double); */ |
|
1629 |
+ double *ranks = (double *)Calloc((*rows),double); |
|
1630 |
+ |
|
1631 |
+ datvec = (double *)Calloc(*rows,double); |
|
1632 |
+ |
|
1633 |
+ for (i =0; i < *rows; i++){ |
|
1634 |
+ row_mean[i] = 0.0; |
|
1635 |
+ } |
|
1636 |
+ |
|
1637 |
+ /* first find the normalizing distribution */ |
|
1638 |
+ for (j = 0; j < *cols; j++){ |
|
1639 |
+ for (i =0; i < *rows; i++){ |
|
1640 |
+ datvec[i] = data[j*(*rows) + i]; |
|
1641 |
+ } |
|
1642 |
+ qsort(datvec,*rows,sizeof(double),(int(*)(const void*, const void*))sort_double); |
|
1643 |
+ for (i =0; i < *rows; i++){ |
|
1644 |
+ row_mean[i] += datvec[i]/((double)*cols); |
|
1645 |
+ } |
|
1646 |
+ } |
|
1647 |
+ |
|
1648 |
+ /* now assign back distribution */ |
|
1649 |
+ dimat = (dataitem **)Calloc(1,dataitem *); |
|
1650 |
+ dimat[0] = (dataitem *)Calloc(*rows,dataitem); |
|
1651 |
+ |
|
1652 |
+ for (j = 0; j < *cols; j++){ |
|
1653 |
+ for (i =0; i < *rows; i++){ |
|
1654 |
+ dimat[0][i].data = data[j*(*rows) + i]; |
|
1655 |
+ dimat[0][i].rank = i; |
|
1656 |
+ } |
|
1657 |
+ qsort(dimat[0],*rows,sizeof(dataitem),sort_fn); |
|
1658 |
+ get_ranks(ranks,dimat[0],*rows); |
|
1659 |
+ for (i =0; i < *rows; i++){ |
|
1660 |
+ ind = dimat[0][i].rank; |
|
1661 |
+ if (ranks[i] - floor(ranks[i]) > 0.4){ |
|
1662 |
+ data[j*(*rows) +ind] = 0.5*(row_mean[(int)floor(ranks[i])-1] + row_mean[(int)floor(ranks[i])]); |
|
1663 |
+ } else { |
|
1664 |
+ data[j*(*rows) +ind] = row_mean[(int)floor(ranks[i])-1]; |
|
1665 |
+ } |
|
1666 |
+ } |
|
1667 |
+ } |
|
1668 |
+ |
|
1669 |
+ Free(ranks); |
|
1670 |
+ Free(datvec); |
|
1671 |
+ Free(dimat[0]); |
|
1672 |
+ |
|
1673 |
+ Free(dimat); |
|
1674 |
+ Free(row_mean); |
|
1675 |
+ return 0; |
|
1676 |
+} |
|
1677 |
+ |
|
1678 |
+ |
|
1679 |
+ |
|
1680 |
+ |
|
1681 |
+/********************************************************* |
|
1682 |
+ ** |
|
1683 |
+ ** SEXP R_qnorm_c(SEXP X) |
|
1684 |
+ ** |
|
1685 |
+ ** SEXP X - a matrix |
|
1686 |
+ ** SEXP copy - a flag if TRUE then make copy |
|
1687 |
+ ** before normalizing, if FALSE work in place |
|
1688 |
+ ** note that this can be dangerous since |
|
1689 |
+ ** it will change the original matrix. |
|
1690 |
+ ** |
|
1691 |
+ ** returns a quantile normalized matrix. |
|
1692 |
+ ** |
|
1693 |
+ ** This is a .Call() interface for quantile normalization |
|
1694 |
+ ** |
|
1695 |
+ *********************************************************/ |
|
1696 |
+ |
|
1697 |
+SEXP R_qnorm_c_improvedties(SEXP X, SEXP copy){ |
|
1698 |
+ |
|
1699 |
+ SEXP Xcopy,dim1; |
|
1700 |
+ double *Xptr; |
|
1701 |
+ int rows,cols; |
|
1702 |
+ |
|
1703 |
+ PROTECT(dim1 = getAttrib(X,R_DimSymbol)); |
|
1704 |
+ rows = INTEGER(dim1)[0]; |
|
1705 |
+ cols = INTEGER(dim1)[1]; |
|
1706 |
+ if (asInteger(copy)){ |
|
1707 |
+ PROTECT(Xcopy = allocMatrix(REALSXP,rows,cols)); |
|
1708 |
+ copyMatrix(Xcopy,X,0); |
|
1709 |
+ } else { |
|
1710 |
+ Xcopy = X; |
|
1711 |
+ } |
|
1712 |
+ Xptr = NUMERIC_POINTER(AS_NUMERIC(Xcopy)); |
|
1713 |
+ |
|
1714 |
+ qnorm_c_improvedties(Xptr, &rows, &cols); |
|
1715 |
+ if (asInteger(copy)){ |
|
1716 |
+ UNPROTECT(2); |
|
1717 |
+ } else { |
|
1718 |
+ UNPROTECT(1); |
|
1719 |
+ } |
|
1720 |
+ return Xcopy; |
|
1721 |
+} |
|
1722 |
+ |
|
1723 |
+ |