## Copyright 2010 Laurent Jacob, Pierre Neuvial and Sandrine Dudoit.

## This file is part of DEGraph.

## DEGraph is free software: you can redistribute it and/or modify
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## DEGraph is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with DEGraph.  If not, see <http://www.gnu.org/licenses/>.

#########################################################################/**
## @RdocFunction AN.test
##
## @title "Performs the Adaptive Neyman test of Fan and Lin (1998)"
##
## \description{
##  @get "title".
## }
##
## @synopsis
##
## \arguments{
##   \item{X1}{A n1 x p @matrix, observed data for class 1: p variables, n1
##     observations.}
##   \item{X2}{A n2 x p @matrix, observed data for class 2: p variables, n2
##     observations.}
##   \item{candK}{A @vector, candidate values for the true number of Fourier
##     components.}
##   \item{na.rm}{A @logical value indicating whether variables with @NA in
##     at least one of the n1 + n2 observations should be discarder before
##     the test is performed.}
## }
##
## \value{
##  A @list with class "htest" containing the following components:
##  \describe{
##    \item{statistic}{A @numeric value, the test statistic.}
##    \item{p.value}{A @numeric value, the corresponding p-value.}
##    \item{kstar}{A @numeric value, the estimated true number of Fourier
##      components.}
##   }
## }
##
## @author
##
## \seealso{
##   @see "BS.test"
##   @see "graph.T2.test"
##   @see "hyper.test"
## }
##
## @examples "../incl/tests.Rex"
##
##*/########################################################################

AN.test <- function(X1, X2, candK=1:ncol(X1), na.rm=FALSE) {
if (na.rm) {
na1 <- apply(X1, 2, FUN=function(x) sum(is.na(x)))
na2 <- apply(X2, 2, FUN=function(x) sum(is.na(x)))
idxs <- which((na1==0) & (na2==0))
X1 <- X1[, idxs]
X2 <- X2[, idxs]
}
p <- ncol(X1)
n1 <- nrow(X1)
n2 <- nrow(X2)
Tstar <- -Inf
kstar <- NA
for (kk in candK) {
var1 <- mean(diag(var(X1[, 1:kk, drop=FALSE])))
var2 <- mean(diag(var(X2[, 1:kk, drop=FALSE])))
X <- colMeans(X1[, 1:kk, drop=FALSE]) - colMeans(X2[, 1:kk, drop=FALSE])
X <- X/sqrt(var1/n1 + var2/n2)
tmp <- sum(X^2 - 1)/sqrt(2*kk)
if (tmp > Tstar) {
Tstar <- tmp
kstar <- kk
}
}
T <- sqrt(2*log(log(p)))*Tstar - (2*log(log(p)) + log(log(log(p)))/2 - log(4*pi)/2)
p <- 1-exp(-exp(-T)) ## Gumbel cdf
res <- list(statistic=T,  p.value=p, kstar=kstar)
class(res) <- "htest"
res
}

############################################################################
## HISTORY
## 2010-09-23