git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/CNEr@89395 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: CNEr |
2 |
-Version: 1.1.1 |
|
3 |
-Date: 2014-03-12 |
|
2 |
+Version: 1.1.2 |
|
3 |
+Date: 2014-03-23 |
|
4 | 4 |
Title: CNE detection and visualization. |
5 | 5 |
Description: Large-scale identification and advanced visualization of sets of conserved noncoding elements. |
6 | 6 |
Author: Ge Tan <ge.tan09@imperial.ac.uk> |
... | ... |
@@ -19,4 +19,5 @@ NeedsCompilation: yes |
19 | 19 |
LazyData: no |
20 | 20 |
Collate: AllGenerics.R AllClasses.R utils.R ceScan.R plot.R |
21 | 21 |
makeGeneDbFromUCSC.R io.R scoringMatrix.R subAxt-methods.R |
22 |
+ Axt-methods.R |
|
22 | 23 |
|
... | ... |
@@ -12,6 +12,7 @@ setGeneric("subAxt", function(x, chr, start, end, #strand=c("+", "-", "*"), |
12 | 12 |
qSize=NULL) |
13 | 13 |
standardGeneric("subAxt") |
14 | 14 |
) |
15 |
+setGeneric("matchDistr", function(x) standardGeneric("matchDistr")) |
|
15 | 16 |
|
16 | 17 |
### ----------------------------------------------------------------- |
17 | 18 |
### CNE class related |
18 | 19 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,22 @@ |
1 |
+### ----------------------------------------------------------------- |
|
2 |
+### summary function for Axt |
|
3 |
+### Not Expotred! |
|
4 |
+### This implementation is too slow for large Axt. Should implment in C. |
|
5 |
+setMethod("matchDistr", signature(x="Axt"), |
|
6 |
+ function(x){ |
|
7 |
+ matchFreq <- table(unlist(mapply(paste0, |
|
8 |
+ strsplit(as.character(targetSeqs(x)), ""), |
|
9 |
+ strsplit(as.character(querySeqs(x)), "")))) |
|
10 |
+ matchFreq <- matchFreq / sum(matchFreq) |
|
11 |
+ freqMatrix <- matrix(0, ncol=6, nrow=6) |
|
12 |
+ colnames(freqMatrix) <- c("A", "C", "G", "T", "-", "N") |
|
13 |
+ rownames(freqMatrix) <- c("A", "C", "G", "T", "-", "N") |
|
14 |
+ for(i in 1:length(matchFreq)){ |
|
15 |
+ splittedNames <- strsplit(names(matchFreq)[i], "")[[1]] |
|
16 |
+ freqMatrix[splittedNames[1], splittedNames[2]] <- |
|
17 |
+ matchFreq[i] |
|
18 |
+ } |
|
19 |
+ return(freqMatrix) |
|
20 |
+ } |
|
21 |
+ ) |
|
22 |
+ |
0 | 23 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,63 @@ |
1 |
+/******************************************************************** |
|
2 |
+ * Alignment related function |
|
3 |
+ * Author: Ge Tan |
|
4 |
+ *******************************************************************/ |
|
5 |
+#include "Rdefines.h" |
|
6 |
+ |
|
7 |
+SEXP subAlignment(SEXP axttStart, SEXP axttEnd, SEXP axttSym, |
|
8 |
+ SEXP axtqStart, SEXP axtqEnd, SEXP axtqSym, |
|
9 |
+ SEXP subtStart, SEXP subtEnd, SEXP symCount){ |
|
10 |
+// The input must have same length and subtStart,subtEnd must be inside |
|
11 |
+// the range of axttStart,axttEnd. |
|
12 |
+ int nrAxt = GET_LENGTH(symCount); |
|
13 |
+ int i, j; |
|
14 |
+ SEXP returnList; |
|
15 |
+ PROTECT(returnList = NEW_LIST(6)); |
|
16 |
+ SEXP newtStart, newtEnd, newtSym, newqStart, newqEnd, newqSym; |
|
17 |
+ newtStart = NEW_INTEGER(nrAxt); |
|
18 |
+ newtEnd = NEW_INTEGER(nrAxt); |
|
19 |
+ newtSym = NEW_CHARACTER(nrAxt); |
|
20 |
+ newqStart = NEW_INTEGER(nrAxt); |
|
21 |
+ newqEnd = NEW_INTEGER(nrAxt); |
|
22 |
+ newqSym = NEW_CHARACTER(nrAxt); |
|
23 |
+ SET_VECTOR_ELT(returnList, 0, newtStart); |
|
24 |
+ SET_VECTOR_ELT(returnList, 1, newtEnd); |
|
25 |
+ SET_VECTOR_ELT(returnList, 2, newtSym); |
|
26 |
+ SET_VECTOR_ELT(returnList, 3, newqStart); |
|
27 |
+ SET_VECTOR_ELT(returnList, 4, newqEnd); |
|
28 |
+ SET_VECTOR_ELT(returnList, 5, newqSym); |
|
29 |
+ int nrGapsTarget, nrGapsQuery, cpStartTarget, cpEndTarget, cpStartQuery, cpEndQuery; |
|
30 |
+ for(i=0; i<nrAxt; i++){ |
|
31 |
+ // Rprintf("The char %d \n", i); |
|
32 |
+ nrGapsTarget = 0; |
|
33 |
+ nrGapsQuery = 0; |
|
34 |
+ cpStartTarget = 0; |
|
35 |
+ cpEndTarget = 0; |
|
36 |
+ for(j=0; j<INTEGER(symCount)[i]; j++){ |
|
37 |
+ if(CHAR(STRING_ELT(axttSym, i))[j] == '_' || |
|
38 |
+ CHAR(STRING_ELT(axttSym, i))[j] == '-'){ |
|
39 |
+ //Rprintf("%c ", CHAR(STRING_ELT(axttSym, i))[j]); |
|
40 |
+ nrGapsTarget++; |
|
41 |
+ } |
|
42 |
+ if(CHAR(STRING_ELT(axtqSym, i))[j] == '_' || |
|
43 |
+ CHAR(STRING_ELT(axtqSym, i))[j] == '-'){ |
|
44 |
+ nrGapsQuery++; |
|
45 |
+ } |
|
46 |
+ if(INTEGER(subtStart) == INTEGER(axttStart) + j - nrGapsTarget){ |
|
47 |
+ cpStartTarget = j; |
|
48 |
+ INTEGER(newqStart) = j + INTEGER(axtqStart) |
|
49 |
+ } |
|
50 |
+ if(INTEGER(subtEnd) == INTEGER(axttStart) + j - nrGapsTarget){ |
|
51 |
+ cpENdTarget = j; |
|
52 |
+ } |
|
53 |
+ |
|
54 |
+ } |
|
55 |
+ Rprintf("\n"); |
|
56 |
+ } |
|
57 |
+ UNPROTECT(1); |
|
58 |
+ return(R_NilValue); |
|
59 |
+} |
|
60 |
+ |
|
61 |
+ |
|
62 |
+ |
|
63 |
+ |