#include<stdio.h>
#include<math.h>
#include "survS.h"
#include "survproto.h"
#include<R_ext/Rdynload.h>
#include <R.h>
#include <Rinternals.h>

extern"C" {
    void coxmat(double *regmat, int *ncolmat, int *nrowmat, 
                double *reg,double *zscores,double *coefs,
                Sint   *maxiter,   Sint   *nusedx,    Sint   *nvarx, 
                double *time,      Sint   *status, 
                double *offset,	double *weights,   Sint   *strata,
                double *means,     double *beta,      double *u, 
                double *imat2,     double loglik[2],  Sint   *flag, 
                double *work,	double *eps,       double *tol_chol,
                double *sctest,double *sctest2,double *sctest3){
        // int reg[*nrowmat];
        int i=0;
        int j=0;
        double sclback=*sctest;
        Sint maxlback=*maxiter;
        //Sint stratalback;
        double betalback=1;
        
        for(j=0;j< *ncolmat;j++){
            
            //	reg erstellen
            for(i=0;i< *nrowmat;i++)
                reg[i]=regmat[ *nrowmat * j+i];
            
            
            //kk(reg);
            try{
                coxfit2(maxiter, nusedx, nvarx, 
                        time, status, reg, 
                        offset, weights, strata,
                        means, beta, u, 
                        imat2, loglik, flag, 
                        work, eps, tol_chol,
                        sctest,sctest2,sctest3);
                zscores[j]=*sctest3;
                coefs[j]=*sctest2;
                *sctest=sclback;
                *maxiter=maxlback;
                *beta=betalback;
            }
            
            catch(...){
                zscores[j]=-1;
                coefs[j]=0;
            }
            
            
        }
        
        
        
        return;
    }
    
    
    
    
    
    
    void kk(double *mm){
        
        mm[1]=123;
        
        return;
    }
    
    
    static const
    R_CMethodDef cMethods[] = {
        {"coxmat", (DL_FUNC) &coxmat, 26},
        NULL
    };
    
    
    void R_init_coxformatrices(DllInfo *info)
    {
        R_registerRoutines(info, cMethods, NULL, NULL, NULL);
    }
    
    
}


///this stuff is adapted from the survival package
/*
 ** here is a cox regression program, written in c
 **     uses Efron's approximation for ties
 **  the input parameters are
 **
 **       maxiter      :number of iterations
 **       nused        :number of people
 **       nvar         :number of covariates
 **       time(n)      :time of event or censoring for person i
 **       status(n)    :status for the ith person    1=dead , 0=censored
 **       covar(nv,n)  :covariates for person i.
 **                        Note that S sends this in column major order.
 **       strata(n)    :marks the strata.  Will be 1 if this person is the
 **                       last one in a strata.  If there are no strata, the
 **                       vector can be identically zero, since the nth person's
 **                       value is always assumed to be = to 1.
 **       offset(n)    :offset for the linear predictor
 **       weights(n)   :case weights
 **       eps          :tolerance for convergence.  Iteration continues until
 **                       the percent change in loglikelihood is <= eps.
 **       chol_tol     : tolerance for the Cholesky decompostion
 **       sctest       : on input contains the method 0=Breslow, 1=Efron
 **
 **  returned parameters
 **       means(nv)    : vector of column means of X
 **       beta(nv)     :the vector of answers (at start contains initial est)
 **       u(nv)        :score vector
 **       imat(nv,nv)  :the variance matrix at beta=final, also a ragged array
 **                      if flag<0, imat is undefined upon return
 **       loglik(2)    :loglik at beta=initial values, at beta=final
 **       sctest       :the score test at beta=initial
 **       flag         :success flag  1000  did not converge
 **                                   1 to nvar: rank of the solution
 **       maxiter      :actual number of iterations used
 **
 **  work arrays
 **       mark(n)
 **       wtave(n)
 **       a(nvar), a2(nvar)
 **       cmat(nvar,nvar)       ragged array
 **       cmat2(nvar,nvar)
 **       newbeta(nvar)         always contains the "next iteration"
 **
 **  the work arrays are passed as a single
 **    vector of storage, and then broken out.
 **
 **  calls functions:  cholesky2, chsolve2, chinv2
 **
 **  the data must be sorted by ascending time within strata
 */


void coxfit2(Sint   *maxiter,   Sint   *nusedx,    Sint   *nvarx, 
             double *time,      Sint   *status,    double *covar2, 
             double *offset,	double *weights,   Sint   *strata,
             double *means,     double *beta,      double *u, 
             double *imat2,     double loglik[2],  Sint   *flag, 
             double *work,	double *eps,       double *tol_chol,
             double *sctest, double *sctest2,double *sctest3){
    int i,j,k, person;
    int     iter;
    int     nused, nvar;
    
    double **covar, **cmat, **imat;  /*ragged array versions*/
double *mark, *wtave;
double *a, *newbeta;
double *a2, **cmat2;
double  denom=0, zbeta, risk;
double  temp, temp2;
double  ndead;
double  newlk=0;
double  d2, efron_wt;
int     halving;    /*are we doing step halving at the moment? */
double     method;

nused = *nusedx;
nvar  = *nvarx;
method= *sctest;
/*
 **  Set up the ragged arrays
 */
covar= dmatrix(covar2, nused, nvar);
imat = dmatrix(imat2,  nvar, nvar);
cmat = dmatrix(work,   nvar, nvar);
cmat2= dmatrix(work+nvar*nvar, nvar, nvar);
a = work + 2*nvar*nvar;
newbeta = a + nvar;
a2 = newbeta + nvar;
mark = a2 + nvar;
wtave= mark + nused;

/*
 **   Mark(i) contains the number of tied deaths at this point,
 **    for the first person of several tied times. It is zero for
 **    the second and etc of a group of tied times.
 **   Wtave contains the average weight for the deaths
 */
temp=0;
j=0;
for (i=nused-1; i>0; i--) {
    if ((time[i]==time[i-1]) & (strata[i-1] != 1)) {
        j += status[i];
        temp += status[i]* weights[i];
        mark[i]=0;
    }
    else  {
        mark[i] = j + status[i];
        if (mark[i] >0) wtave[i]= (temp+ status[i]*weights[i])/ mark[i];
        temp=0; j=0;
    }
}
mark[0]  = j + status[0];
if (mark[0]>0) wtave[0] = (temp +status[0]*weights[0])/ mark[0];

/*
 ** Subtract the mean from each covar, as this makes the regression
 **  much more stable
 */
for (i=0; i<nvar; i++) {
    temp=0;
    for (person=0; person<nused; person++) temp += covar[i][person];
    temp /= nused;
    means[i] = temp;
    for (person=0; person<nused; person++) covar[i][person] -=temp;
}

/*
 ** do the initial iteration step
 */
strata[nused-1] =1;
loglik[1] =0;
for (i=0; i<nvar; i++) {
    u[i] =0;
    for (j=0; j<nvar; j++)
        imat[i][j] =0 ;
}

efron_wt =0;
for (person=nused-1; person>=0; person--) {
    if (strata[person] == 1) {
        denom = 0;
        for (i=0; i<nvar; i++) {
            a[i] = 0;
            a2[i]=0 ;
            for (j=0; j<nvar; j++) {
                cmat[i][j] = 0;
                cmat2[i][j]= 0;
            }
        }
    }
    
    zbeta = offset[person];    /* form the term beta*z   (vector mult) */
for (i=0; i<nvar; i++)
    zbeta += beta[i]*covar[i][person];
zbeta = coxsafe(zbeta);
risk = exp(zbeta) * weights[person];

denom += risk;
efron_wt += status[person] * risk;  /*sum(denom) for tied deaths*/

for (i=0; i<nvar; i++) {
    a[i] += risk*covar[i][person];
    for (j=0; j<=i; j++)
        cmat[i][j] += risk*covar[i][person]*covar[j][person];
}

if (status[person]==1) {
    loglik[1] += weights[person]*zbeta;
    for (i=0; i<nvar; i++) {
        u[i] += weights[person]*covar[i][person];
        a2[i] +=  risk*covar[i][person];
        for (j=0; j<=i; j++)
            cmat2[i][j] += risk*covar[i][person]*covar[j][person];
    }
}
if (mark[person] >0) {  /* once per unique death time */
/*
 ** Trick: when 'method==0' then temp=0, giving Breslow's method
 */
ndead = mark[person];
    for (k=0; k<ndead; k++) {
        temp = (double)k * method / ndead;
        d2= denom - temp*efron_wt;
        loglik[1] -= wtave[person] * log(d2);
        for (i=0; i<nvar; i++) {
            temp2 = (a[i] - temp*a2[i])/ d2;
            u[i] -= wtave[person] *temp2;
            for (j=0; j<=i; j++)
                imat[j][i] +=  wtave[person]*(
                    (cmat[i][j] - temp*cmat2[i][j]) /d2 -
                    temp2*(a[j]-temp*a2[j])/d2);
        }
    }
    efron_wt =0;
    for (i=0; i<nvar; i++) {
        a2[i]=0;
        for (j=0; j<nvar; j++)  cmat2[i][j]=0;
    }
}
}   /* end  of accumulation loop */

loglik[0] = loglik[1];   /* save the loglik for iteration zero  */

/* am I done?
 **   update the betas and test for convergence
 */
for (i=0; i<nvar; i++) /*use 'a' as a temp to save u0, for the score test*/
a[i] = u[i];

*flag= cholesky2(imat, nvar, *tol_chol);
chsolve2(imat,nvar,a);        /* a replaced by  a *inverse(i) */

*sctest=0;
for (i=0; i<nvar; i++)
    *sctest +=  u[i]*a[i];

/*
 **  Never, never complain about convergence on the first step.  That way,
 **  if someone HAS to they can force one iter at a time.
 */
for (i=0; i<nvar; i++) {
    newbeta[i] = beta[i] + a[i];
}
if (*maxiter==0) {
    chinv2(imat,nvar);
    for (i=1; i<nvar; i++)
        for (j=0; j<i; j++)  imat[i][j] = imat[j][i];
    *sctest3=beta[0]/sqrt(imat[0][0]);
    *sctest2=beta[0];
    return;   /* and we leave the old beta in peace */
}

/*
 ** here is the main loop
 */
halving =0 ;             /* =1 when in the midst of "step halving" */
for (iter=1; iter<=*maxiter; iter++) {
    newlk =0;
    for (i=0; i<nvar; i++) {
        u[i] =0;
        for (j=0; j<nvar; j++)
            imat[i][j] =0;
    }
    
    /*
     ** The data is sorted from smallest time to largest
     ** Start at the largest time, accumulating the risk set 1 by 1
     */
    for (person=nused-1; person>=0; person--) {
        if (strata[person] == 1) { /* rezero temps for each strata */
    efron_wt =0;
            denom = 0;
            for (i=0; i<nvar; i++) {
                a[i] = 0;
                a2[i]=0 ;
                for (j=0; j<nvar; j++) {
                    cmat[i][j] = 0;
                    cmat2[i][j]= 0;
                }
            }
        }
        
        zbeta = offset[person];
        for (i=0; i<nvar; i++)
            zbeta += newbeta[i]*covar[i][person];
        zbeta = coxsafe(zbeta);
        risk = exp(zbeta ) * weights[person];
        denom += risk;
        efron_wt += status[person] * risk;  /* sum(denom) for tied deaths*/
    
    for (i=0; i<nvar; i++) {
        a[i] += risk*covar[i][person];
        for (j=0; j<=i; j++)
            cmat[i][j] += risk*covar[i][person]*covar[j][person];
    }
    
    if (status[person]==1) {
        newlk += weights[person] *zbeta;
        for (i=0; i<nvar; i++) {
            u[i] += weights[person] *covar[i][person];
            a2[i] +=  risk*covar[i][person];
            for (j=0; j<=i; j++)
                cmat2[i][j] += risk*covar[i][person]*covar[j][person];
        }
    }
    
    if (mark[person] >0) {  /* once per unique death time */
    for (k=0; k<mark[person]; k++) {
        temp = (double)k* method /mark[person];
        d2= denom - temp*efron_wt;
        newlk -= wtave[person] *log(d2);
        for (i=0; i<nvar; i++) {
            temp2 = (a[i] - temp*a2[i])/ d2;
            u[i] -= wtave[person] *temp2;
            for (j=0; j<=i; j++)
                imat[j][i] +=  wtave[person] *(
                    (cmat[i][j] - temp*cmat2[i][j]) /d2 -
                    temp2*(a[j]-temp*a2[j])/d2);
        }
    }
    efron_wt =0;
        for (i=0; i<nvar; i++) {
            a2[i]=0;
            for (j=0; j<nvar; j++)  cmat2[i][j]=0;
        }
    }
    }   /* end  of accumulation loop  */
    
    /* am I done?
     **   update the betas and test for convergence
     */
    *flag = cholesky2(imat, nvar, *tol_chol);
    
    if (fabs(1-(loglik[1]/newlk))<=*eps && halving==0) { /* all done */
    loglik[1] = newlk;
        chinv2(imat, nvar);     /* invert the information matrix */
    for (i=1; i<nvar; i++)
        for (j=0; j<i; j++)  imat[i][j] = imat[j][i];
    for (i=0; i<nvar; i++)
        beta[i] = newbeta[i];
    *maxiter = iter;
    *sctest3=beta[0]/sqrt(imat[0][0]);
    *sctest2=beta[0];
    return;
    }
    
    if (iter==*maxiter) break;  /*skip the step halving calc*/
    
    if (newlk < loglik[1])   {    /*it is not converging ! */
    halving =1;
        for (i=0; i<nvar; i++)
            newbeta[i] = (newbeta[i] + beta[i]) /2; /*half of old increment */
    }
    else {
        halving=0;
        loglik[1] = newlk;
        chsolve2(imat,nvar,u);
        
        j=0;
        for (i=0; i<nvar; i++) {
            beta[i] = newbeta[i];
            newbeta[i] = newbeta[i] +  u[i];
        }
    }
}   /* return for another iteration */
    
    loglik[1] = newlk;
chinv2(imat, nvar);
for (i=1; i<nvar; i++)
    for (j=0; j<i; j++)  imat[i][j] = imat[j][i];
for (i=0; i<nvar; i++)
    beta[i] = newbeta[i];
*flag= 1000;

*sctest3=beta[0]/sqrt(imat[0][0]);
*sctest2=beta[0];
return;
}

/* $Id: cholesky2.c 11357 2009-09-04 15:22:46Z therneau $
 **
 ** subroutine to do Cholesky decompostion on a matrix: C = FDF'
 **   where F is lower triangular with 1's on the diagonal, and D is diagonal
 **
 ** arguments are:
 **     n         the size of the matrix to be factored
 **     **matrix  a ragged array containing an n by n submatrix to be factored
 **     toler     the threshold value for detecting "singularity"
 **
 **  The factorization is returned in the lower triangle, D occupies the
 **    diagonal and the upper triangle is left undisturbed.
 **    The lower triangle need not be filled in at the start.
 **
 **  Return value:  the rank of the matrix (non-negative definite), or -rank
 **     it not SPD or NND
 **
 **  If a column is deemed to be redundant, then that diagonal is set to zero.
 **
 **   Terry Therneau
 */

int cholesky2(double **matrix, int n, double toler)
{
    double temp;
    int  i,j,k;
    double eps, pivot;
    int rank;
    int nonneg;
    
    nonneg=1;
    eps =0;
    for (i=0; i<n; i++) {
        if (matrix[i][i] > eps)  eps = matrix[i][i];
        for (j=(i+1); j<n; j++)  matrix[j][i] = matrix[i][j];
    }
    eps *= toler;
    
    rank =0;
    for (i=0; i<n; i++) {
        pivot = matrix[i][i];
        if (pivot < eps) {
            matrix[i][i] =0;
            if (pivot < -8*eps) nonneg= -1;
        }
        else  {
            rank++;
            for (j=(i+1); j<n; j++) {
                temp = matrix[j][i]/pivot;
                matrix[j][i] = temp;
                matrix[j][j] -= temp*temp*pivot;
                for (k=(j+1); k<n; k++) matrix[k][j] -= temp*matrix[k][i];
            }
        }
    }
    return(rank * nonneg);
}

/*  $Id: chsolve2.c 11376 2009-12-14 22:53:57Z therneau $
 **
 ** Solve the equation Ab = y, where the cholesky decomposition of A and y
 **   are the inputs.
 **
 ** Input  **matrix, which contains the chol decomp of an n by n
 **   matrix in its lower triangle.
 **        y[n] contains the right hand side
 **
 **  y is overwriten with b
 **
 **  Terry Therneau
 */

void chsolve2(double **matrix, int n, double *y)
{
    register int i,j;
    register double temp;
    
    /*
     ** solve Fb =y
     */
    for (i=0; i<n; i++) {
        temp = y[i] ;
        for (j=0; j<i; j++)
            temp -= y[j] * matrix[i][j] ;
        y[i] = temp ;
    }
    /*
     ** solve DF'z =b
     */
    for (i=(n-1); i>=0; i--) {
        if (matrix[i][i]==0)  y[i] =0;
        else {
            temp = y[i]/matrix[i][i];
            for (j= i+1; j<n; j++)
                temp -= y[j]*matrix[j][i];
            y[i] = temp;
        }
    }
}
/* $Id: chinv2.c 11357 2009-09-04 15:22:46Z therneau $
 **
 ** matrix inversion, given the FDF' cholesky decomposition
 **
 ** input  **matrix, which contains the chol decomp of an n by n
 **   matrix in its lower triangle.
 **
 ** returned: the upper triangle + diagonal contain (FDF')^{-1}
 **            below the diagonal will be F inverse
 **
 **  Terry Therneau
 */


void chinv2(double **matrix , int n)
{
    register double temp;
    register int i,j,k;
    
    /*
     ** invert the cholesky in the lower triangle
     **   take full advantage of the cholesky's diagonal of 1's
     */
    for (i=0; i<n; i++){
        if (matrix[i][i] >0) {
            matrix[i][i] = 1/matrix[i][i];   /*this line inverts D */
    for (j= (i+1); j<n; j++) {
        matrix[j][i] = -matrix[j][i];
        for (k=0; k<i; k++)     /*sweep operator */
    matrix[j][k] += matrix[j][i]*matrix[i][k];
    }
        }
    }
    
    /*
     ** lower triangle now contains inverse of cholesky
     ** calculate F'DF (inverse of cholesky decomp process) to get inverse
     **   of original matrix
     */
    for (i=0; i<n; i++) {
        if (matrix[i][i]==0) {  /* singular row */
    for (j=0; j<i; j++) matrix[j][i]=0;
            for (j=i; j<n; j++) matrix[i][j]=0;
        }
        else {
            for (j=(i+1); j<n; j++) {
                temp = matrix[j][i]*matrix[j][j];
                if (j!=i) matrix[i][j] = temp;
                for (k=i; k<j; k++)
                    matrix[i][k] += temp*matrix[j][k];
            }
        }
    }
}


/*
 ** A very few pathologic cases can cause the Newton Raphson iteration
 **  path in coxph to generate a horrific argument to exp().  Since all these
 **  calls to exp result in (essentially) relative risks we choose a
 **  fixed value of LARGE on biological grounds: any number less than 
 **  1/(population of the earth) is essentially a zero, that is, an exponent
 **  outside the range of +-23.  
 ** A sensible numeric limit would be log(.Machine$double.xmax) which is 
 **  about 700, perhaps divided by 2 or log(n) to keep a few more bits.
 **  However, passing this down the R calling chain to the c-routine is a lot
 **  more hassle than I want to implement for this very rare case.
 **
 ** Actually, the argument does not have to get large enough to have any
 **  single exponential overflow.  In (start, stop] data we keep a running 
 **  sum of scores exp(x[i]*beta), which involves both adding subjects in and
 **  subtracting them out.  An outlier x value that enters and then leaves can
 **  erase all the digits of accuracy.  Most machines have about 16 digits of
 **  accuracy and exp(21) uses up about 9 of them, leaving enough that the
 **  routine doesn't fall on it's face.  (A user data set with outlier that
 **  got exp(54) and a overlarge first beta on the first iteration led to this 
 **  paragraph.)  When beta-hat is infinite and x well behaved, the loglik 
 **  usually converges before xbeta gets to 15, so this protection should not
 **  harm the iteration path of even edge cases; only fix those that truely
 **  go astray. 
 **
 ** The truncation turns out not to be necessary for small values, since a risk
 **  score of exp(-50) or exp(-1000) or 0 all give essentially the same effect.
 ** We only cut these off enough to avoid underflow.
 */

#define LARGE 22
#define SMALL -200

double coxsafe(double x) {
    if (x< SMALL) return(SMALL);
    if (x> LARGE) return(LARGE);
    return (x);
}
/* $Id: dmatrix.c 11357 2009-09-04 15:22:46Z therneau $
 **
 ** set up ragged arrays, with #of columns and #of rows
 */


double **dmatrix(double *array, int ncol, int nrow)
{
    S_EVALUATOR
    register int i;
    register double **pointer;
    
    pointer = (double **) ALLOC(nrow, sizeof(double *));
    for (i=0; i<nrow; i++) {
        pointer[i] = array;
        array += ncol;
    }
    return(pointer);
}