\name{RP.advance}
\alias{RP.advance}
\title{Advanced Rank Product/Rank Sum Analysis}
\description{
The function performs the Rank Product (or Rank Sum) method to
identify differentially expressed genes. It is possible to do either a
one-class or two-class analysis. It is also possible to combine data from
different studies (e.g. datasets generated by different laboratories)
}
\usage{
RP.advance(data, cl, origin, logged = TRUE, na.rm = TRUE, gene.names = NULL,
plot = FALSE, rand = NULL, calculateProduct = TRUE, MinNumOfValidPairs = NA,
RandomPairs = NA, huge = FALSE, fast = TRUE, tail.time = 0.05)
}
\arguments{
\item{data}{the data set that should be analyzed. Every row of this dataset must
correspond to a gene}
\item{cl}{a vector containing the class labels of the samples.
In the two class unpaired case, the label of a sample
is either 0 (e.g., control group) or 1 (e.g., case group).
For one class data, the label for each sample should be 1}
\item{origin}{a vector containing the origin labels of the samples. The label is
the same for samples within one lab and different for samples from different
labs.}
\item{logged}{if "TRUE" data have been previously log transformed. Otherwise it 
should be set as "FALSE"}
\item{na.rm}{if "FALSE", the NA value will not be used in
computing rank. If "TRUE" (default), the missing values will be replaced by
the genewise median of the non-missing values.
Gene with a number of missing values greater than "MinNumOfValidPairs" are still
not considered in the analysis}
\item{gene.names}{if "NULL", no gene name will be attached to the outputs,
otherwise it contains the vector of gene names}
\item{plot}{if "TRUE", plot the estimated pfp vs the rank of each gene}
\item{rand}{if specified, the random number generator will
be put in a reproducible state}
\item{calculateProduct}{if calculateProduct="TRUE"
(default) the rank product method is performed.
Otherwise the rank sum method is performed}
\item{MinNumOfValidPairs}{a parameter that indicates the minimum number
of NAs accepted per each gene. If it is set to NA (default) the half of
the number of replicates is used}
\item{RandomPairs}{number of random pairs generated in the function,
if set to NA (default),
the odd integer closer to the square of the number of replicates is used}
\item{huge}{if "TRUE" not all the outputs are evaluated
in order to save space}
\item{fast}{if "FALSE" the exact p-values for the Rank Sum are evaluated for
any size of the dataset.
Otherwise (default), if the size of the dataset is too big, only the p-values
that can be computed in "tail.time" minutes (starting from the tail) are
evaluated with the exact method. The others are estimated with the Gaussian
approximation. If calculateProduct="TRUE" this parameter is ignored}
\item{tail.time}{the time (default 0.05 min) dedicated to evaluate the exact
p-values for the Rank Sum.If calculateProduct="TRUE" this parameter is ignored.}
}
\value{
A summary of the results obtained by the Rank Product (or Rank Sum) method.
\item{pfp}{estimated percentage of false positive predictions (pfp), both
considering upregulated an downregulated genes}
\item{pval}{estimated pvalues per each gene being up- and down-regulated}
\item{RPs/RSs}{the Rank Product (or Rank Sum) statistics evaluated per
each gene}
\item{RPrank/RSrank}{rank of the Rank Product (or Rank Sum) of each gene
in ascending order}
\item{Orirank}{ranks obtained when considering each possible pairing.
In this version of the package, this is not used to compute
Rank Product (or Rank Sum), but it is kept for backward compatibility}
\item{AveFC}{fold changes of average expressions (class1/class2).
log fold-change if data has been log transformed,
original fold change otherwise}
\item{allrank1}{fold change of class 1/class 2 under each origin.
log fold-change if data has been log transformed,
original fold change otherwise}
\item{allrank2}{fold change of class 2/class 1 under each origin.
log fold-change if data has been log transformed,
original fold change otherwise}
\item{nrep}{total number of replicates}
\item{groups}{vector of labels (as cl)}
\item{RandomPairs_ranks}{a matrix containing the
ranks evaluated for each RandomPair}
}

\references{
Breitling, R., Armengaud, P., Amtmann, A., and Herzyk, P.(2004) Rank Products: A 
simple, yet powerful, new method to detect differentially regulated genes in 
replicated microarray experiments, FEBS Letter, 57383-92 
}
\author{
Francesco Del Carratore,
\email{francesco.delcarratore@postgrad.manchester.ac.uk}
\cr Andris Janckevics, \email{andris.jankevics@gmail.com}
}

\seealso{
\code{\link{topGene}} \code{\link{RP}}
\code{\link{RPadvance}} \code{\link{plotRP}} 
\code{\link{RankProducts}} \code{\link{RSadvance}}
}
\examples{
# Load the data of Golub et al. (1999). data(golub) 
# contains a 3051x38 gene expression
# matrix called golub, a vector of length called golub.cl 
# that consists of the 38 class labels,
# and a matrix called golub.gnames whose third column 
# contains the gene names.
data(golub)

##For data with single origin
subset <- c(1:4,28:30)
origin <- rep(1,7)
#identify genes 
RP.out <- RP.advance(golub[,subset],golub.cl[subset],
            origin,plot=FALSE,rand=123)
      
#For data from multiple origins
      
# Load the data arab in the package, which contains 
# the expression of 22,081 genes
# of control and treatment group from the experiments 
# indenpently conducted at two 
#laboratories.
data(arab)
arab.origin #1 1 1 1 1 1 2 2 2 2
arab.cl #0 0 0 1 1 1 0 0 1 1
RP.adv.out <- RP.advance(arab,arab.cl,arab.origin,
                gene.names=arab.gnames,logged=TRUE,rand=123)

attributes(RP.adv.out)
head(RP.adv.out$pfp)
head(RP.adv.out$RPs)
head(RP.adv.out$AveFC)
      
     
     
#Suppose we want to check the consistence of the data 
#sets generated in two different 
#labs. For example, we would look for genes that were \
# measured to be up-regulated in 
#class 2 at lab 1, but down-regulated in class 2 at lab 2.\
data(arab)
arab.cl2 <- arab.cl

arab.cl2[arab.cl==0 &arab.origin==2] <- 1

arab.cl2[arab.cl==1 &arab.origin==2] <- 0

arab.cl2
##[1] 0 0 0 1 1 1 1 1 0 0


#look for genes differentially expressed
#between hypothetical class 1 and 2
arab.sub=arab[1:500,] ##using subset for fast computation
arab.gnames.sub=arab.gnames[1:500]
Rsum.adv.out <- RP.advance(arab.sub,arab.cl2,arab.origin,calculateProduct
                =FALSE,logged=TRUE,gene.names=arab.gnames.sub,rand=123)

attributes(Rsum.adv.out)
}