git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@74871 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -2,7 +2,7 @@ SUBDIRS = gmap gstruct |
2 | 2 |
|
3 | 3 |
.PHONY: all clean $(SUBDIRS) |
4 | 4 |
|
5 |
-OBJECTS = bamreader.o bamtally.o iit.o variantsummary.o R_init_gmapR.o |
|
5 |
+OBJECTS = bamreader.o bamtally.o iit.o variantsummary.o genome.o R_init_gmapR.o |
|
6 | 6 |
|
7 | 7 |
R_SRC_DIR = ${CURDIR} |
8 | 8 |
PREFIX = ${R_SRC_DIR}/../inst/usr |
... | ... |
@@ -7,11 +7,14 @@ |
7 | 7 |
static const R_CallMethodDef callMethods[] = { |
8 | 8 |
|
9 | 9 |
/* bamtally.c */ |
10 |
- CALLMETHOD_DEF(R_Bamtally_iit, 17), |
|
10 |
+ CALLMETHOD_DEF(R_Bamtally_iit, 18), |
|
11 | 11 |
CALLMETHOD_DEF(R_tally_iit_parse, 4), |
12 | 12 |
|
13 | 13 |
/* bamreader.c */ |
14 | 14 |
CALLMETHOD_DEF(R_Bamread_new, 1), |
15 |
+ |
|
16 |
+ /* genome.c */ |
|
17 |
+ CALLMETHOD_DEF(R_Genome_getSeq, 6), |
|
15 | 18 |
|
16 | 19 |
{NULL, NULL, 0} |
17 | 20 |
}; |
... | ... |
@@ -1,17 +1,13 @@ |
1 |
-#include <stdlib.h> |
|
2 |
-#include <string.h> |
|
3 |
- |
|
4 | 1 |
#include <gstruct/bool.h> |
5 |
-#include <gstruct/datadir.h> |
|
6 | 2 |
#include <gstruct/iit-read.h> |
7 | 3 |
#include <gstruct/genome.h> |
8 |
-#include <gstruct/interval.h> |
|
9 | 4 |
#include <gstruct/genomicpos.h> |
10 | 5 |
#include <gstruct/bamread.h> |
11 | 6 |
#include <gstruct/bamtally.h> |
12 | 7 |
|
13 | 8 |
#include "gmapR.h" |
14 | 9 |
#include "iit.h" |
10 |
+#include "genome.h" |
|
15 | 11 |
|
16 | 12 |
SEXP |
17 | 13 |
R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
... | ... |
@@ -20,7 +16,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
20 | 16 |
SEXP minimum_mapq_R, SEXP good_unique_mapq_R, |
21 | 17 |
SEXP maximum_nhits_R, |
22 | 18 |
SEXP need_concordant_p_R, SEXP need_unique_p_R, |
23 |
- SEXP need_primary_p_R, |
|
19 |
+ SEXP need_primary_p_R, SEXP ignore_duplicates_p_R, |
|
24 | 20 |
SEXP min_depth_R, SEXP variant_strands_R, |
25 | 21 |
SEXP ignore_query_Ns_p_R, |
26 | 22 |
SEXP print_indels_p_R, |
... | ... |
@@ -38,30 +34,16 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
38 | 34 |
bool need_concordant_p = asLogical(need_concordant_p_R); |
39 | 35 |
bool need_unique_p = asLogical(need_unique_p_R); |
40 | 36 |
bool need_primary_p = asLogical(need_primary_p_R); |
37 |
+ bool ignore_duplicates_p = asLogical(ignore_duplicates_p_R); |
|
41 | 38 |
int min_depth = asInteger(min_depth_R); |
42 | 39 |
int variant_strands = asInteger(variant_strands_R); |
43 | 40 |
bool ignore_query_Ns_p = asLogical(ignore_query_Ns_p_R); |
44 | 41 |
bool print_indels_p = asLogical(print_indels_p_R); |
45 | 42 |
int blocksize = asInteger(blocksize_R); |
46 | 43 |
int verbosep = asLogical(verbosep_R); |
47 |
- |
|
48 |
- char *genomesubdir, *fileroot, *dbversion, *iitfile; |
|
49 |
- genomesubdir = Datadir_find_genomesubdir(&fileroot, &dbversion, |
|
50 |
- (char *)genome_dir, (char *)db); |
|
51 |
- |
|
52 |
- iitfile = (char *) calloc(strlen(genomesubdir) + strlen("/") + |
|
53 |
- strlen(fileroot) + strlen(".chromosome.iit") + 1, |
|
54 |
- sizeof(char)); |
|
55 |
- sprintf(iitfile, "%s/%s.chromosome.iit", genomesubdir, fileroot); |
|
56 |
- IIT_T chromosome_iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true, |
|
57 |
- /*divread*/READ_ALL, /*divstring*/NULL, |
|
58 |
- /*add_iit_p*/false, /*labels_read_p*/true); |
|
59 |
- Genome_T genome = Genome_new(genomesubdir, fileroot, /*snps_root*/NULL, |
|
60 |
- /*uncompressedp*/false, /*access*/USE_MMAP_ONLY); |
|
61 |
- free(iitfile); |
|
62 |
- free(fileroot); |
|
63 |
- free(dbversion); |
|
64 |
- free(genomesubdir); |
|
44 |
+ |
|
45 |
+ Genome_T genome = createGenome(genome_dir, db); |
|
46 |
+ IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db); |
|
65 | 47 |
|
66 | 48 |
const char *chr = NULL; |
67 | 49 |
Genomicpos_T start = 0; |
... | ... |
@@ -78,6 +60,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
78 | 60 |
alloclength, minimum_mapq, good_unique_mapq, |
79 | 61 |
maximum_nhits, need_concordant_p, |
80 | 62 |
need_unique_p, need_primary_p, |
63 |
+ ignore_duplicates_p, |
|
81 | 64 |
min_depth, variant_strands, ignore_query_Ns_p, |
82 | 65 |
print_indels_p, blocksize, verbosep); |
83 | 66 |
IIT_free(&chromosome_iit); |
84 | 67 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,116 @@ |
1 |
+#include <stdlib.h> |
|
2 |
+#include <string.h> |
|
3 |
+ |
|
4 |
+#include <gstruct/bool.h> |
|
5 |
+#include <gstruct/datadir.h> |
|
6 |
+#include <gstruct/iit-read.h> |
|
7 |
+#include <gstruct/genome.h> |
|
8 |
+#include <gstruct/interval.h> |
|
9 |
+#include <gstruct/genomicpos.h> |
|
10 |
+#include <gstruct/complement.h> |
|
11 |
+ |
|
12 |
+#include "gmapR.h" |
|
13 |
+#include "genome.h" |
|
14 |
+ |
|
15 |
+IIT_T |
|
16 |
+readChromosomeIIT (const char *genome_dir, const char *db) { |
|
17 |
+ char *genomesubdir, *fileroot, *dbversion, *iitfile; |
|
18 |
+ genomesubdir = Datadir_find_genomesubdir(&fileroot, &dbversion, |
|
19 |
+ (char *)genome_dir, (char *)db); |
|
20 |
+ |
|
21 |
+ iitfile = (char *) calloc(strlen(genomesubdir) + strlen("/") + |
|
22 |
+ strlen(fileroot) + strlen(".chromosome.iit") + 1, |
|
23 |
+ sizeof(char)); |
|
24 |
+ sprintf(iitfile, "%s/%s.chromosome.iit", genomesubdir, fileroot); |
|
25 |
+ IIT_T chromosome_iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true, |
|
26 |
+ /*divread*/READ_ALL, /*divstring*/NULL, |
|
27 |
+ /*add_iit_p*/false, /*labels_read_p*/true); |
|
28 |
+ free(iitfile); |
|
29 |
+ free(fileroot); |
|
30 |
+ free(dbversion); |
|
31 |
+ free(genomesubdir); |
|
32 |
+ |
|
33 |
+ return chromosome_iit; |
|
34 |
+} |
|
35 |
+ |
|
36 |
+Genome_T |
|
37 |
+createGenome (const char *genome_dir, const char *db) { |
|
38 |
+ char *genomesubdir, *fileroot, *dbversion; |
|
39 |
+ genomesubdir = Datadir_find_genomesubdir(&fileroot, &dbversion, |
|
40 |
+ (char *)genome_dir, (char *)db); |
|
41 |
+ |
|
42 |
+ Genome_T genome = Genome_new(genomesubdir, fileroot, /*snps_root*/NULL, |
|
43 |
+ /*uncompressedp*/false, /*access*/USE_MMAP_ONLY); |
|
44 |
+ free(fileroot); |
|
45 |
+ free(dbversion); |
|
46 |
+ free(genomesubdir); |
|
47 |
+ |
|
48 |
+ return genome; |
|
49 |
+} |
|
50 |
+ |
|
51 |
+static char complCode[128] = COMPLEMENT_LC; |
|
52 |
+ |
|
53 |
+static void |
|
54 |
+make_complement_inplace (char *sequence, Genomicpos_T length) { |
|
55 |
+ char temp; |
|
56 |
+ int i, j; |
|
57 |
+ |
|
58 |
+ for (i = 0, j = length-1; i < length/2; i++, j--) { |
|
59 |
+ temp = complCode[(int) sequence[i]]; |
|
60 |
+ sequence[i] = complCode[(int) sequence[j]]; |
|
61 |
+ sequence[j] = temp; |
|
62 |
+ } |
|
63 |
+ if (i == j) { |
|
64 |
+ sequence[i] = complCode[(int) sequence[i]]; |
|
65 |
+ } |
|
66 |
+ |
|
67 |
+ return; |
|
68 |
+} |
|
69 |
+ |
|
70 |
+SEXP |
|
71 |
+R_Genome_getSeq (SEXP genome_dir_R, SEXP db_R, |
|
72 |
+ SEXP seqnames_R, SEXP start_R, SEXP width_R, SEXP strand_R) |
|
73 |
+{ |
|
74 |
+ const char *genome_dir = |
|
75 |
+ genome_dir_R == R_NilValue ? NULL : CHAR(asChar(genome_dir_R)); |
|
76 |
+ const char *db = CHAR(asChar(db_R)); |
|
77 |
+ Genome_T genome = createGenome(genome_dir, db); |
|
78 |
+ IIT_T chromosome_iit = readChromosomeIIT(genome_dir, db); |
|
79 |
+ int *start = INTEGER(start_R); |
|
80 |
+ int *width = INTEGER(width_R); |
|
81 |
+ |
|
82 |
+ int max_width = 0; |
|
83 |
+ for (int i = 0; i < length(width_R); i++) { |
|
84 |
+ if (width[i] > max_width) |
|
85 |
+ max_width = width[i]; |
|
86 |
+ } |
|
87 |
+ char *buffer = (char *) R_alloc(sizeof(char), max_width + 1); |
|
88 |
+ |
|
89 |
+ int seqname_index; |
|
90 |
+ const char *old_seqname = NULL; // premature optimization |
|
91 |
+ SEXP result; |
|
92 |
+ PROTECT(result = allocVector(STRSXP, length(start_R))); |
|
93 |
+ for (int i = 0; i < length(start_R); i++) { |
|
94 |
+ const char *seqname = CHAR(STRING_ELT(seqnames_R, i)); |
|
95 |
+ if (old_seqname == NULL || strcmp(seqname, old_seqname)) { |
|
96 |
+ seqname_index = IIT_find_linear(chromosome_iit, (char *)seqname); |
|
97 |
+ if (seqname_index < 0) { |
|
98 |
+ error("Cannot find chromosome %s in genome", seqname); |
|
99 |
+ } |
|
100 |
+ } |
|
101 |
+ old_seqname = seqname; |
|
102 |
+ int chroffset = Interval_low(IIT_interval(chromosome_iit, seqname_index)); |
|
103 |
+ Genome_fill_buffer_simple(genome, chroffset + start[i] - 1, width[i], |
|
104 |
+ buffer); |
|
105 |
+ if (CHAR(STRING_ELT(strand_R, i))[0] == '-') |
|
106 |
+ make_complement_inplace(buffer, width[i]); |
|
107 |
+ SET_STRING_ELT(result, i, mkChar(buffer)); |
|
108 |
+ } |
|
109 |
+ |
|
110 |
+ IIT_free(&chromosome_iit); |
|
111 |
+ Genome_free(&genome); |
|
112 |
+ |
|
113 |
+ UNPROTECT(1); |
|
114 |
+ return result; |
|
115 |
+} |
|
116 |
+ |
0 | 117 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,12 @@ |
1 |
+#ifndef GENOME_H |
|
2 |
+#define GENOME_H |
|
3 |
+ |
|
4 |
+#include <gstruct/iit-read.h> |
|
5 |
+#include <gstruct/genome.h> |
|
6 |
+ |
|
7 |
+/* Internal genome helper functions */ |
|
8 |
+ |
|
9 |
+IIT_T readChromosomeIIT (const char *genome_dir, const char *db); |
|
10 |
+Genome_T createGenome (const char *genome_dir, const char *db); |
|
11 |
+ |
|
12 |
+#endif |
... | ... |
@@ -15,7 +15,7 @@ R_Bamtally_iit (SEXP bamreader_R, SEXP genome_dir_R, SEXP db_R, |
15 | 15 |
SEXP minimum_mapq_R, SEXP good_unique_mapq_R, |
16 | 16 |
SEXP maximum_nhits_R, |
17 | 17 |
SEXP need_concordant_p_R, SEXP need_unique_p_R, |
18 |
- SEXP need_primary_p_R, |
|
18 |
+ SEXP need_primary_p_R, SEXP ignore_duplicates_p_R, |
|
19 | 19 |
SEXP min_depth_R, SEXP variant_strands_R, |
20 | 20 |
SEXP ignore_query_Ns_p, |
21 | 21 |
SEXP print_indels_p_R, |
... | ... |
@@ -26,4 +26,8 @@ SEXP |
26 | 26 |
R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R, |
27 | 27 |
SEXP high_base_quality, SEXP which_R); |
28 | 28 |
|
29 |
+SEXP |
|
30 |
+R_Genome_getSeq (SEXP genome_dir_R, SEXP db_R, |
|
31 |
+ SEXP seqnames_R, SEXP start_R, SEXP width_R, SEXP strand_R); |
|
32 |
+ |
|
29 | 33 |
#endif |