### -----------------------------------------------------------------
### The main function for scanning the axts and get the CNEs
### Not exported!
ceScanR <- function(axts, tFilter=NULL, qFilter=NULL, qSizes=NULL, 
                   thresholds=c("49_50")){
  ## Here the returned tStart and qStart are 1-based coordinates. 
  ## Of course ends are also 1-based.
  if(!is.null(qFilter))
    if(is.null(qSizes) || !is(qSizes, "Seqinfo"))
      stop("qSizes must exist and be a Seqinfo object when qFilter exists")
  
  winSize <- as.integer(sapply(strsplit(thresholds, "_"), "[", 2))
  minScore <- as.integer(sapply(strsplit(thresholds, "_"), "[", 1))
  resFiles <- tempfile(pattern=paste(minScore, winSize, "ceScan", sep="-"), 
                       tmpdir=tempdir(), fileext="")
  ## How stupid I am...
  if(is.null(tFilter) && is.null(qFilter)){
    .Call2("myCeScan", NULL, NULL, NULL,
           NULL, NULL, NULL,
           NULL, NULL,
           as.character(seqnames(targetRanges(axts))),
           start(targetRanges(axts)), end(targetRanges(axts)),
           as.character(strand(targetRanges(axts))),
           as.character(targetSeqs(axts)),
           as.character(seqnames(queryRanges(axts))),
           start(queryRanges(axts)), end(queryRanges(axts)),
           as.character(strand(queryRanges(axts))),
           as.character(querySeqs(axts)),
           score(axts), symCount(axts), winSize, minScore,
           as.character(resFiles),
           PACKAGE="CNEr")
  }else if(is.null(tFilter) && !is.null(qFilter)){
    .Call2("myCeScan", NULL, NULL, NULL,
           as.character(seqnames(qFilter)), start(qFilter), end(qFilter),
           as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)),
           as.character(seqnames(targetRanges(axts))),
           start(targetRanges(axts)), end(targetRanges(axts)),
           as.character(strand(targetRanges(axts))),
           as.character(targetSeqs(axts)),
           as.character(seqnames(queryRanges(axts))),
           start(queryRanges(axts)), end(queryRanges(axts)),
           as.character(strand(queryRanges(axts))),
           as.character(querySeqs(axts)),
           score(axts), symCount(axts), winSize, minScore,
           as.character(resFiles),
           PACKAGE="CNEr")
  }else if(!is.null(tFilter) && is.null(qFilter)){
    .Call2("myCeScan", as.character(seqnames(tFilter)), 
           start(tFilter), end(tFilter),
           NULL, NULL, NULL,
           NULL, NULL,
           as.character(seqnames(targetRanges(axts))),
           start(targetRanges(axts)), end(targetRanges(axts)),
           as.character(strand(targetRanges(axts))),
           as.character(targetSeqs(axts)),
           as.character(seqnames(queryRanges(axts))),
           start(queryRanges(axts)), end(queryRanges(axts)),
           as.character(strand(queryRanges(axts))),
           as.character(querySeqs(axts)),
           score(axts), symCount(axts), winSize, minScore,
           as.character(resFiles),
           PACKAGE="CNEr")
  }else{
    .Call2("myCeScan", as.character(seqnames(tFilter)), 
           start(tFilter), end(tFilter),
              as.character(seqnames(qFilter)), start(qFilter), end(qFilter),
              as.character(seqnames(qSizes)), as.integer(seqlengths(qSizes)), 
              as.character(seqnames(targetRanges(axts))), 
              start(targetRanges(axts)), end(targetRanges(axts)), 
              as.character(strand(targetRanges(axts))), 
              as.character(targetSeqs(axts)),
              as.character(seqnames(queryRanges(axts))), 
              start(queryRanges(axts)), end(queryRanges(axts)), 
              as.character(strand(queryRanges(axts))), 
              as.character(querySeqs(axts)),
              score(axts), symCount(axts), winSize, minScore, 
              as.character(resFiles),
              PACKAGE="CNEr")
  }
  CNE <- lapply(resFiles, 
               function(x){
                 res <- read.table(x, header=FALSE, sep="\t", as.is=TRUE)
               colnames(res) <- c("tName", "tStart", "tEnd", "qName", "qStart", 
                                 "qEnd", "strand", "score", "cigar")
               return(res)})
  names(CNE) <-  paste(minScore, winSize, sep="_")
  unlink(resFiles)
  return(CNE)
}


### -----------------------------------------------------------------
### Another main function for CNEs identification, 
### but it takes the axt files and bed files as input
### Not exported!
ceScanFile <- function(axtFiles, tFilterFile=NULL, qFilterFile=NULL, 
                       qSizes=NULL,
                       thresholds=c("49_50")){
  ## Here the returned tStart and qStart are 1-based coordinates. 
  ## Of course ends are also 1-based.
  if(!is.null(qFilterFile))
    if(is.null(qSizes) || !is(qSizes, "Seqinfo"))
      stop("qSizes must exist and be a Seqinfo object when qFilter exists")
  winSize <- as.integer(sapply(strsplit(thresholds, "_"), "[", 2))
  minScore <- as.integer(sapply(strsplit(thresholds, "_"), "[", 1))
  resFiles <- tempfile(pattern=paste(minScore, winSize, "ceScan", sep="-"), 
                      tmpdir=tempdir(), fileext="")
  .Call2("myCeScanFile", axtFiles, tFilterFile, qFilterFile, 
        as.character(seqnames(qSizes)), as.character(seqlengths(qSizes)),
        winSize, minScore,
        resFiles, PACKAGE="CNEr")
  CNE <- lapply(resFiles,
               function(x){
                 res <- read.table(x, header=FALSE, sep="\t", as.is=TRUE)
                 colnames(res) <- c("tName", "tStart", "tEnd", "qName", 
                                   "qStart", "qEnd", "strand", "score", "cigar")
                 return(res)})
  names(CNE) <-  paste(minScore, winSize, sep="_")
  unlink(resFiles)
  return(CNE)
}

### -----------------------------------------------------------------
### The S4 methods for ceScan
### Exported!
setMethod("ceScan", signature(axts="Axt", tFilter="GRanges", qFilter="GRanges",
                              qSizes="Seqinfo"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanR(axts, tFilter=tFilter, qFilter=qFilter, 
                    qSizes=qSizes, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="Axt", tFilter="missing", qFilter="GRanges",
                              qSizes="Seqinfo"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanR(axts, tFilter=NULL, qFilter=qFilter,
                    qSizes=qSizes, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="Axt", tFilter="missing", qFilter="missing",
                              qSizes="missing"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanR(axts, tFilter=NULL, qFilter=NULL,
                    qSizes=NULL, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="Axt", tFilter="GRanges", qFilter="missing",
                              qSizes="missing"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanR(axts, tFilter=tFilter, qFilter=NULL,
                    qSizes=NULL, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="character", tFilter="character", 
                              qFilter="character", qSizes="Seqinfo"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanFile(axtFiles=axts, tFilterFile=tFilter, qFilterFile=qFilter,
                       qSizes=qSizes, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="character", tFilter="missing",
                              qFilter="character", qSizes="Seqinfo"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanFile(axtFiles=axts, tFilterFile=NULL, qFilterFile=qFilter,
                       qSizes=qSizes, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="character", tFilter="missing",
                              qFilter="missing", qSizes="missing"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanFile(axtFiles=axts, tFilterFile=NULL, qFilterFile=NULL,
                       qSizes=NULL, thresholds=thresholds)
          }
          )
setMethod("ceScan", signature(axts="character", tFilter="character",
                              qFilter="missing", qSizes="missing"),
          function(axts, tFilter, qFilter, qSizes, thresholds="49_50"){
            ceScanFile(axtFiles=axts, tFilterFile=tFilter, qFilterFile=NULL,
                       qSizes=NULL, thresholds=thresholds)
          }
          )

### -----------------------------------------------------------------
### Merge two side cnes
### Exported!
cneMerge <- function(cne1, cne2){
  # In this function, cne's start is 1-based coordinates. ends are 1-based too. 
  # Although in cne1 and cne2, query strand can be negative, 
  # but the coordinate is already on positive strand.
  ## first reverse the cne2's cigar
  ## cne2 = transform(cne2, cigar=chartr("DI", "ID", cigar))
  cne2$cigar <- chartr("DI", "ID", cne2$cigar)
  if(any(cne2$strand == "-")){
    #cne2[cne2$strand=="-", ] = transform(subset(cne2, strand=="-"), 
    #                                      cigar=reverseCigar(cigar))
    cne2[cne2$strand == "-", "cigar"] <- 
      reverseCigar(cne2[cne2$strand == "-", "cigar"])
  }
  colnames(cne2) <- c("qName", "qStart", "qEnd", "tName", 
                     "tStart", "tEnd", "strand", "score", "cigar")
  cne1T <- GRanges(seqnames=cne1$tName, 
                   ranges=IRanges(start=cne1$tStart, end=cne1$tEnd), 
                   strand="+")
  cne1Q <- GRanges(seqnames=cne1$qName, 
                   ranges=IRanges(start=cne1$qStart, end=cne1$qEnd), 
                   strand="+")
  cne2T <- GRanges(seqnames=cne2$tName, 
                   ranges=IRanges(start=cne2$tStart, end=cne2$tEnd), 
                   strand="+")
  cne2Q <- GRanges(seqnames=cne2$qName, 
                   ranges=IRanges(start=cne2$qStart, end=cne2$qEnd), 
                   strand="+")
  cneT <- c(cne1T, cne2T)
  cneQ <- c(cne1Q, cne2Q)
  # Here, I just removed the CNEs which are within another big CNEs. 
  # In very rare cases(1 in 100000), some cnes may just connect 
  # and need to merge them. Needs to be done in the future (perhaps not easy to be done in R).
  cneT_overlap <- findOverlaps(cneT, type="within", 
                              ignoreSelf=TRUE, ignoreRedundant=TRUE)
  #cneT_overlap1 = findOverlaps(cneT, type="equal", 
                              #ignoreSelf=TRUE, ignoreRedundant=TRUE)
  #cneT_overlap2 = findOverlaps(cneT, type="any", 
                              #ignoreSelf=TRUE, ignoreRedundant=TRUE)
  cneQ_overlap <- findOverlaps(cneQ, type="within", 
                              ignoreSelf=TRUE, ignoreRedundant=TRUE)
  #cneQ_overlap1 = findOverlaps(cneQ, type="equal", 
                              #ignoreSelf=TRUE, ignoreRedundant=TRUE)
  #cneQ_overlap2 = findOverlaps(cneQ, type="any", 
                              #ignoreSelf=TRUE, ignoreRedundant=TRUE)
  redundance <- IRanges::intersect(cneT_overlap, cneQ_overlap)
  #any_overlap = intersect(cneT_overlap2, cneQ_overlap2)
  #foo = setdiff(any_overlap, redundance)
  #paste(subjectHits(foo), queryHits(foo), sep=",") 
  # %in% paste(queryHits(redundance), subjectHits(redundance), sep=",")
  res <- rbind(cne1, cne2)[-queryHits(redundance), ] 
  # After the merge, we'd better name them as 1 and 2 
  # rather than the tName and qName. Use the names in mysql cne db.
  colnames(res) <- c("chr1", "start1", "end1", "chr2", "start2", "end2", 
                    "strand", "similarity", "cigar")
  return(res)
}

#cutoffs1 = 4
#cutoffs2 = 8

blatCNE <- function(CNE, winSize, cutoffs1, cutoffs2, 
                    assembly1Twobit, assembly2Twobit, 
                    blatOptions=NULL, cutIdentity=90, 
                    tmpDir=tempdir(), blatBinary="blat"){
  # In this function, the input CNE's start and end are 1-based coordinates.
  blatOptionsALL <- list("DEF_BLAT_OPT_WSLO"=
                         "-tileSize=9 -minScore=24 -repMatch=16384",
                         "DEF_BLAT_OPT_WSMID"=
                         "-tileSize=10 -minScore=28 -repMatch=4096",
                         "DEF_BLAT_OPT_WSHI"=
                         "-tileSize=11 -minScore=30 -repMatch=1024")
  if(!is(winSize, "integer")){
    winSize <- as.integer(winSize)
    warning("The windize has been rounded to ", winSize)
  }
  if(is.null(blatOptions)){
    if(winSize > 45L)
      blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSHI"]]
    else if(winSize > 35L)
      blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSMID"]]
    else
      blatOptions <- blatOptionsALL[["DEF_BLAT_OPT_WSLO"]]
  }
  if(cutIdentity > 100  || cutIdentity < 0)
    stop("cutIdentity must be between 0 and 100!")
  if(!is(cutoffs1, "integer"))
    stop("cutoffs1 must be an integer!")
  if(!is(cutoffs2, "integer"))
    stop("cutoffs2 must be an integer!")
  
  .run_blat <- function(cne, cutIdentity, whichAssembly, 
                        assemblyTwobit, blatBinary, blatOptions, tmpDir){
    temp_cne <- tempfile(pattern="cne-", tmpdir=tmpDir)
    temp_psl <- tempfile(pattern="psl-", tmpdir=tmpDir)
    # For Blat, the start is 0-based and end is 1-based. 
    # So make cne's coordinates to comply with it.
    if(whichAssembly == 1){
      cne <- paste0(assemblyTwobit, ":", 
                    cne[,"chr1"], 
                    ":", format(cne[,"start1"]-1, trim=TRUE, scientific=FALSE), 
                    "-", format(cne[,"end1"], trim=TRUE, scientific=FALSE))
    }else{
      cne <- paste0(assemblyTwobit, ":", 
                    cne[,"chr2"], 
                    ":", format(cne[,"start2"]-1, trim=TRUE, scientific=FALSE), 
                    "-", format(cne[,"end2"], trim=TRUE, scientific=FALSE))
    }
    cne <- unique(cne)
    writeLines(cne, con=temp_cne)
    cmd <- paste0(blatBinary, " ", blatOptions, " ",
                  "-minIdentity=", cutIdentity,
                  " ", assemblyTwobit, " ", temp_cne, " ", temp_psl)
    my.system(cmd)
    unlink(temp_cne)
    return(temp_psl)
  }
  psl1Fn <- .run_blat(CNE, cutIdentity, 1, assembly1Twobit, 
                      blatBinary, blatOptions, tmpDir)
  psl2Fn <- .run_blat(CNE, cutIdentity, 2, assembly2Twobit, 
                      blatBinary, blatOptions, tmpDir)
  psl1 <- read.table(psl1Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE)
  psl2 <- read.table(psl2Fn, header=FALSE, sep="\t", skip=5, as.is=TRUE)
  colnames(psl1) <- colnames(psl2) <- 
    c("matches", "misMatches", "repMatches", "nCount", 
      "qNumInsert", "qBaseInsert", "tNumInsert", "tBaseInsert", 
      "strand", "qName", "qSize", "qStart", "qEnd", "tName", 
      "tSize", "tStart", "tEnd", "blockCount", "blockSizes", 
      "qStarts", "tStarts")
  ##psl1 = subset(psl1, matches/qSize >= cutIdentity/100)
  ##for some reason, subset will cause error duing the Bioconductor build
  psl1 <- psl1[psl1$matches / psl1$qSize >= cutIdentity/100, ]
  ##psl2 = subset(psl2, matches/qSize >= cutIdentity/100)
  psl2 <- psl2[psl2$matches / psl2$qSize >= cutIdentity/100, ]
  psl1 <- table(psl1[ , "qName"])
  psl2 <- table(psl2[ , "qName"])
  CNEtNameIndex <- unname(psl1[paste0(CNE[ ,1], ":", CNE[ ,2]-1, "-", 
                                      CNE[ ,3])])
  CNEtNameIndex[is.na(CNEtNameIndex)] <- 0
  CNEqNameIndex <- unname(psl2[paste0(CNE[ ,4], ":", CNE[ ,5]-1, "-", 
                                      CNE[ ,6])])
  CNEqNameIndex[is.na(CNEqNameIndex)] <- 0
  CNE <- CNE[CNEtNameIndex <= cutoffs1 & CNEqNameIndex <= cutoffs2, ]
  return(CNE)
  # Here, the CNE's starts and ends are still 1-based.
}


ceScanOneStep <- function(axt1, filter1=NULL, sizes1, assembly1, twoBit1,
                          axt2, filter2=NULL, sizes2, assembly2, twoBit2,
                          thresholds=c("49_50"),
                          blatBinary="blat",
                          blatCutoff1, blatCutoff2
                          ){
  if(grepl("_", assembly1) || grepl("_", assembly2))
    stop("The assembly name must not contain \"_\"")
  if(assembly1 < assembly2){
    .ceScanSwap(axt1=axt1, filter1=filter1, sizes1=sizes1, assembly1=assembly1,
                twoBit1=twoBit1, axt2=axt2, filter2=filter2, sizes2=sizes2,
                assembly2=assembly2, twoBit2=twoBit2, thresholds=thresholds,
                blatBinary=blatBinary, blatCutoff1=blatCutoff1, 
                blatCutoff2=blatCutoff2)
  }else{
    .ceScanSwap(axt1=axt2, filter1=filter2, sizes1=sizes2, assembly1=assembly2,
                twoBit1=twoBit2, axt2=axt1, filter2=filter1,
                sizes2=sizes1, assembly2=assembly1, twoBit2=twoBit1,
                thresholds=thresholds, blatBinary=blatBinary,
                blatCutoff1=blatCutoff2, blatCutoff2=blatCutoff1)
  }
}

.ceScanSwap <- function(axt1, filter1=NULL, sizes1, assembly1, twoBit1,
                        axt2, filter2=NULL, sizes2, assembly2, twoBit2,
                        thresholds=c("49_50"),
                        blatBinary="blat",
                        blatCutoff1, blatCutoff2
                        ){
  ## In this function, we make sure "assembly1" is smaller than "assembly2".
  ## danRer7 is smaller than hg19.
  ## This is just for easier database storage table name: "danRer7_hg19_49_50"
  CNE1 <- ceScan(axt1, filter1, filter2, sizes2, thresholds)
  CNE2 <- ceScan(axt2, filter2, filter1, sizes1, thresholds)
  CNEMerged <- mapply(cneMerge, CNE1, CNE2, SIMPLIFY=FALSE)
  names(CNEMerged) = paste(assembly1, assembly2, thresholds, sep="_")
  CNEBlated <- list()
  for(i in 1:length(CNEMerged)){
    CNEBlated[[names(CNEMerged)[i]]] <- 
      blatCNE(CNEMerged[[i]], as.integer(sub(".+_.+_\\d+_", "", 
                                           names(CNEMerged)[i])),
              cutoffs1=blatCutoff1, cutoffs2=blatCutoff2,
              assembly1Twobit=twoBit1, assembly2Twobit=twoBit2,
              blatBinary=blatBinary)
  }
  ans <- CNE(assembly1=assembly1, assembly2=assembly2,
             thresholds=thresholds,
             CNE1=CNE1, CNE2=CNE2, CNEMerged=CNEMerged,
             CNERepeatsFiltered=CNEBlated,
             alignMethod=blatBinary)
  return(ans)
}