git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@125153 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,3 +1,4 @@ |
1 |
+#include <stdlib.h> |
|
1 | 2 |
#include <gstruct/iit-read.h> |
2 | 3 |
|
3 | 4 |
#include "iit.h" |
... | ... |
@@ -14,14 +15,308 @@ SEXP R_IIT_new(IIT_T iit) { |
14 | 15 |
return iit_R; |
15 | 16 |
} |
16 | 17 |
|
17 |
-SEXP R_iit_read(SEXP iitfile_R) { |
|
18 |
+/* |
|
19 |
+ IIT API: |
|
20 |
+ * IIT_read |
|
21 |
+ * IIT_annotation |
|
22 |
+ * IIT_interval(iit, which), Interval_low, Interval_length, Interval_sign |
|
23 |
+ |
|
24 |
+ Coordinate mapping (read divs, not labels): |
|
25 |
+ Any overlap: |
|
26 |
+ * IIT_get(&(*nmatches),*iit,*divstring,*coordstart,*coordend,sortp); |
|
27 |
+ By strand: |
|
28 |
+ * IIT_get_signed() |
|
29 |
+ Exact coordinate match: |
|
30 |
+ * IIT_get_exact_multiple() |
|
31 |
+ |
|
32 |
+ Can restrict by "type" of feature: |
|
33 |
+ * typeint = IIT_typeint(*iit,typestring) |
|
34 |
+ * IIT_get_typed(&(*nmatches),*iit,*divstring,*coordstart,*coordend,typeint, |
|
35 |
+ sortp); |
|
36 |
+ * IIT_get_typed_signed() |
|
37 |
+ |
|
38 |
+ Restrict by label (read labels, not divs): |
|
39 |
+ * IIT_find(&(*nmatches),iit,query); |
|
40 |
+ |
|
41 |
+ Read standard annotation: |
|
42 |
+ * field = IIT_annotation(&restofheader,iit,index,&allocp); |
|
43 |
+ Or a specific field: |
|
44 |
+ * fieldint = IIT_fieldint(iit,fieldstring) |
|
45 |
+ * field = IIT_fieldvalue(iit,index,fieldint); |
|
46 |
+ |
|
47 |
+ Get labels: |
|
48 |
+ * IIT_label(iit, index, &allocp) |
|
49 |
+ |
|
50 |
+ Introspection: |
|
51 |
+ Metadata: |
|
52 |
+ * IIT_name |
|
53 |
+ * IIT_version |
|
54 |
+ * IIT_total_nintervals |
|
55 |
+ Types: |
|
56 |
+ * IIT_ntypes(iit) |
|
57 |
+ * IIT_typestring (iit, typeint); |
|
58 |
+ Fields: |
|
59 |
+ * IIT_nfields(iit) |
|
60 |
+ * IIT_fieldstring(iit, fieldint); |
|
61 |
+*/ |
|
62 |
+ |
|
63 |
+typedef struct IITMatches { |
|
64 |
+ IIT_T iit; |
|
65 |
+ int **subscripts; |
|
66 |
+ int *nsubscripts; |
|
67 |
+ int nqueries; |
|
68 |
+} IITMatches; |
|
69 |
+ |
|
70 |
+static IITMatches _new_IITMatches(IIT_T iit, int nqueries) { |
|
71 |
+ IITMatches matches; |
|
72 |
+ matches.iit = iit; |
|
73 |
+ matches.subscripts = (int **) R_alloc(sizeof(int *), nqueries); |
|
74 |
+ matches.nsubscripts = (int *) R_alloc(sizeof(int), nqueries); |
|
75 |
+ matches.nqueries = nqueries; |
|
76 |
+ return matches; |
|
77 |
+} |
|
78 |
+ |
|
79 |
+SEXP R_iit_open(SEXP iitfile_R, SEXP divread_R, SEXP labels_read_R) { |
|
18 | 80 |
char *iitfile = (char *) CHAR(asChar(iitfile_R)); |
81 |
+ int divread = asLogical(divread_R) ? READ_ALL : READ_NONE; |
|
82 |
+ bool labels_read = asLogical(labels_read_R); |
|
19 | 83 |
IIT_T iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true, |
20 |
- /*divread*/READ_ALL, /*divstring*/NULL, |
|
21 |
- /*add_iit_p*/false, /*labels_read_p*/true); |
|
84 |
+ divread, /*divstring*/NULL, |
|
85 |
+ /*add_iit_p*/false, labels_read); |
|
22 | 86 |
return R_IIT_new(iit); |
23 | 87 |
} |
24 | 88 |
|
89 |
+static IITMatches _iit_find(IIT_T iit, SEXP which_R) { |
|
90 |
+ IITMatches matches = _new_IITMatches(iit, length(which_R)); |
|
91 |
+ for (int i = 0; i < length(which_R); i++) { |
|
92 |
+ matches.subscripts[i] = |
|
93 |
+ IIT_find(matches.nsubscripts + i, iit, |
|
94 |
+ (char *)CHAR(STRING_ELT(which_R, i))); |
|
95 |
+ } |
|
96 |
+ return matches; |
|
97 |
+} |
|
98 |
+ |
|
99 |
+static IITMatches _iit_get_exact_multiple(IIT_T iit, |
|
100 |
+ SEXP chr_R, int *start, int *end, |
|
101 |
+ int type) |
|
102 |
+{ |
|
103 |
+ IITMatches matches = _new_IITMatches(iit, length(chr_R)); |
|
104 |
+ for (int i = 0; i < length(chr_R); i++) { |
|
105 |
+ matches.subscripts[i] = |
|
106 |
+ IIT_get_exact_multiple(matches.nsubscripts + i, iit, |
|
107 |
+ (char *)CHAR(STRING_ELT(chr_R, i)), |
|
108 |
+ start[i], end[i], type); |
|
109 |
+ } |
|
110 |
+ return matches; |
|
111 |
+} |
|
112 |
+ |
|
113 |
+static IITMatches _iit_get_typed(IIT_T iit, |
|
114 |
+ SEXP chr_R, int *start, int *end, |
|
115 |
+ int type) |
|
116 |
+{ |
|
117 |
+ IITMatches matches = _new_IITMatches(iit, length(chr_R)); |
|
118 |
+ for (int i = 0; i < length(chr_R); i++) { |
|
119 |
+ matches.subscripts[i] = |
|
120 |
+ IIT_get_typed(matches.nsubscripts + i, iit, |
|
121 |
+ (char *)CHAR(STRING_ELT(chr_R, i)), |
|
122 |
+ start[i], end[i], type, |
|
123 |
+ /*sortp*/false); |
|
124 |
+ } |
|
125 |
+ return matches; |
|
126 |
+} |
|
127 |
+ |
|
128 |
+static IITMatches _iit_get(IIT_T iit, SEXP chr_R, int *start, int *end) |
|
129 |
+{ |
|
130 |
+ IITMatches matches = _new_IITMatches(iit, length(chr_R)); |
|
131 |
+ for (int i = 0; i < length(chr_R); i++) { |
|
132 |
+ matches.subscripts[i] = |
|
133 |
+ IIT_get(matches.nsubscripts + i, iit, |
|
134 |
+ (char *)CHAR(STRING_ELT(chr_R, i)), |
|
135 |
+ start[i], end[i], |
|
136 |
+ /*sortp*/false); |
|
137 |
+ } |
|
138 |
+ return matches; |
|
139 |
+} |
|
140 |
+ |
|
141 |
+static IITMatches _iit_get_typed_signed(IIT_T iit, |
|
142 |
+ SEXP chr_R, int *start, int *end, |
|
143 |
+ int type, int *sign) |
|
144 |
+{ |
|
145 |
+ IITMatches matches = _new_IITMatches(iit, length(chr_R)); |
|
146 |
+ for (int i = 0; i < length(chr_R); i++) { |
|
147 |
+ matches.subscripts[i] = |
|
148 |
+ IIT_get_typed_signed(matches.nsubscripts + i, iit, |
|
149 |
+ (char *)CHAR(STRING_ELT(chr_R, i)), |
|
150 |
+ start[i], end[i], type, sign[i], |
|
151 |
+ /*sortp*/false); |
|
152 |
+ } |
|
153 |
+ return matches; |
|
154 |
+} |
|
155 |
+ |
|
156 |
+static IITMatches _iit_get_signed(IIT_T iit, |
|
157 |
+ SEXP chr_R, int *start, int *end, |
|
158 |
+ int *sign) |
|
159 |
+{ |
|
160 |
+ IITMatches matches = _new_IITMatches(iit, length(chr_R)); |
|
161 |
+ for (int i = 0; i < length(chr_R); i++) { |
|
162 |
+ matches.subscripts[i] = |
|
163 |
+ IIT_get_signed(matches.nsubscripts + i, iit, |
|
164 |
+ (char *)CHAR(STRING_ELT(chr_R, i)), |
|
165 |
+ start[i], end[i], sign[i], |
|
166 |
+ /*sortp*/false); |
|
167 |
+ } |
|
168 |
+ return matches; |
|
169 |
+} |
|
170 |
+ |
|
171 |
+static IITMatches _iit_get_for_coords(IIT_T iit, SEXP which_R, SEXP type_R, |
|
172 |
+ SEXP ignore_strand_R, SEXP exact_R) |
|
173 |
+{ |
|
174 |
+ SEXP chr_R = VECTOR_ELT(which_R, 0); |
|
175 |
+ int *start = INTEGER(VECTOR_ELT(which_R, 1)); |
|
176 |
+ int *end = INTEGER(VECTOR_ELT(which_R, 2)); |
|
177 |
+ int *sign = INTEGER(VECTOR_ELT(which_R, 3)); |
|
178 |
+ int type = type_R == R_NilValue ? 0 : |
|
179 |
+ IIT_typeint(iit, (char *)CHAR(asChar(type_R))); |
|
180 |
+ bool ignore_strand = asLogical(ignore_strand_R); |
|
181 |
+ bool exact = asLogical(exact_R); |
|
182 |
+ IITMatches matches; |
|
183 |
+ |
|
184 |
+ if (exact) { |
|
185 |
+ /* sign filtering happens in R */ |
|
186 |
+ matches = _iit_get_exact_multiple(iit, chr_R, start, end, type); |
|
187 |
+ } else if (ignore_strand) { |
|
188 |
+ if (type > 0) { |
|
189 |
+ matches = _iit_get_typed(iit, chr_R, start, end, type); |
|
190 |
+ } else { |
|
191 |
+ matches = _iit_get(iit, chr_R, start, end); |
|
192 |
+ } |
|
193 |
+ } else { |
|
194 |
+ if (type > 0) { |
|
195 |
+ matches = _iit_get_typed_signed(iit, chr_R, start, end, type, sign); |
|
196 |
+ } else { |
|
197 |
+ matches = _iit_get_signed(iit, chr_R, start, end, sign); |
|
198 |
+ } |
|
199 |
+ } |
|
200 |
+ |
|
201 |
+ return matches; |
|
202 |
+} |
|
203 |
+ |
|
204 |
+static IITMatches _iit_get_for_labels(IIT_T iit, SEXP which_R) { |
|
205 |
+ return _iit_find(iit, which_R); |
|
206 |
+} |
|
207 |
+ |
|
208 |
+enum { CHR, START, WIDTH, STRAND, ANNO, ANS_LENGTH }; |
|
209 |
+ |
|
210 |
+static SEXP _convert_matches(IITMatches matches, bool ret_ranges, SEXP fields_R) |
|
211 |
+{ |
|
212 |
+ SEXP ans, chr_R, start_R, width_R, strand_R, anno_R; |
|
213 |
+ int nfields = fields_R == R_NilValue ? 1 : length(fields_R); |
|
214 |
+ int *fields; |
|
215 |
+ int nmatches = 0; |
|
216 |
+ IIT_T iit = matches.iit; |
|
217 |
+ |
|
218 |
+ for (int m = 0; m < matches.nqueries; m++) { |
|
219 |
+ nmatches += matches.nsubscripts[m]; |
|
220 |
+ } |
|
221 |
+ |
|
222 |
+ PROTECT(ans = allocVector(VECSXP, ANS_LENGTH)); |
|
223 |
+ if (ret_ranges) { |
|
224 |
+ chr_R = allocVector(STRSXP, nmatches); |
|
225 |
+ SET_VECTOR_ELT(ans, CHR, chr_R); |
|
226 |
+ start_R = allocVector(INTSXP, nmatches); |
|
227 |
+ SET_VECTOR_ELT(ans, START, start_R); |
|
228 |
+ width_R = allocVector(INTSXP, nmatches); |
|
229 |
+ SET_VECTOR_ELT(ans, WIDTH, width_R); |
|
230 |
+ strand_R = allocVector(INTSXP, nmatches); |
|
231 |
+ SET_VECTOR_ELT(ans, STRAND, strand_R); |
|
232 |
+ } |
|
233 |
+ anno_R = allocVector(VECSXP, nfields); |
|
234 |
+ SET_VECTOR_ELT(ans, ANNO, anno_R); |
|
235 |
+ |
|
236 |
+ for (int f = 0; f < nfields; f++) { |
|
237 |
+ SET_VECTOR_ELT(anno_R, f, allocVector(STRSXP, nmatches)); |
|
238 |
+ } |
|
239 |
+ |
|
240 |
+ if (fields_R != R_NilValue) { |
|
241 |
+ fields = (int *)R_alloc(sizeof(int), nfields); |
|
242 |
+ for (int f = 0; f < nfields; f++) { |
|
243 |
+ fields[f] = IIT_fieldint(iit, (char *)STRING_ELT(fields_R, f)); |
|
244 |
+ } |
|
245 |
+ } |
|
246 |
+ |
|
247 |
+ for (int i = 0; i < nmatches; i++) { |
|
248 |
+ if (ret_ranges) { |
|
249 |
+ Interval_T interval = IIT_interval(iit, i); |
|
250 |
+ SET_STRING_ELT(chr_R, i, mkChar(IIT_divstring_from_index(iit, i))); |
|
251 |
+ INTEGER(start_R)[i] = Interval_low(interval); |
|
252 |
+ INTEGER(width_R)[i] = Interval_length(interval); |
|
253 |
+ INTEGER(strand_R)[i] = Interval_sign(interval); |
|
254 |
+ } |
|
255 |
+ if (fields_R == R_NilValue) { |
|
256 |
+ char *restofheader; |
|
257 |
+ bool allocp; |
|
258 |
+ SET_STRING_ELT(anno_R, i, |
|
259 |
+ mkChar(IIT_annotation(&restofheader, iit, i, |
|
260 |
+ &allocp))); |
|
261 |
+ if (allocp == true) { |
|
262 |
+ free(restofheader); |
|
263 |
+ } |
|
264 |
+ } else { |
|
265 |
+ for (int f = 0; f < nfields; f++) { |
|
266 |
+ SET_STRING_ELT(VECTOR_ELT(anno_R, f), i, |
|
267 |
+ mkChar(IIT_fieldvalue(iit, i, fields[f]))); |
|
268 |
+ } |
|
269 |
+ } |
|
270 |
+ } |
|
271 |
+ |
|
272 |
+ UNPROTECT(1); |
|
273 |
+ return ans; |
|
274 |
+} |
|
275 |
+ |
|
276 |
+SEXP R_iit_read(SEXP iit_R, SEXP which_R, SEXP type_R, SEXP fields_R, |
|
277 |
+ SEXP ignore_strand_R, SEXP exact_R, SEXP ret_ranges_R) |
|
278 |
+{ |
|
279 |
+ IITMatches matches; |
|
280 |
+ IIT_T iit = R_ExternalPtrAddr(iit_R); |
|
281 |
+ bool ret_ranges = asLogical(ret_ranges_R); |
|
282 |
+ |
|
283 |
+ if (TYPEOF(which_R) == VECSXP) { |
|
284 |
+ matches = _iit_get_for_coords(iit, which_R, type_R, |
|
285 |
+ ignore_strand_R, exact_R); |
|
286 |
+ } else { |
|
287 |
+ matches = _iit_get_for_labels(iit, which_R); |
|
288 |
+ } |
|
289 |
+ |
|
290 |
+ return _convert_matches(matches, ret_ranges, fields_R); |
|
291 |
+} |
|
292 |
+ |
|
293 |
+SEXP R_iit_typeNames(SEXP iit_R) { |
|
294 |
+ IIT_T iit = R_ExternalPtrAddr(iit_R); |
|
295 |
+ SEXP ans; |
|
296 |
+ PROTECT(ans = allocVector(STRSXP, IIT_ntypes(iit))); |
|
297 |
+ for (int i = 0; i < IIT_ntypes(iit); i++) { |
|
298 |
+ SET_STRING_ELT(ans, i, mkChar(IIT_typestring(iit, i))); |
|
299 |
+ } |
|
300 |
+ UNPROTECT(1); |
|
301 |
+ return ans; |
|
302 |
+} |
|
303 |
+ |
|
304 |
+SEXP R_iit_fieldNames(SEXP iit_R) { |
|
305 |
+ IIT_T iit = R_ExternalPtrAddr(iit_R); |
|
306 |
+ SEXP ans; |
|
307 |
+ PROTECT(ans = allocVector(STRSXP, IIT_nfields(iit))); |
|
308 |
+ for (int i = 0; i < IIT_nfields(iit); i++) { |
|
309 |
+ SET_STRING_ELT(ans, i, mkChar(IIT_fieldstring(iit, i))); |
|
310 |
+ } |
|
311 |
+ UNPROTECT(1); |
|
312 |
+ return ans; |
|
313 |
+} |
|
314 |
+ |
|
315 |
+SEXP R_iit_length(SEXP iit_R) { |
|
316 |
+ IIT_T iit = R_ExternalPtrAddr(iit_R); |
|
317 |
+ return ScalarInteger(IIT_total_nintervals(iit)); |
|
318 |
+} |
|
319 |
+ |
|
25 | 320 |
/* 'IIT_output_direct' is not included in the libgstruct binary */ |
26 | 321 |
|
27 | 322 |
/* |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69806 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,37 @@ |
1 |
+#include <gstruct/iit-read.h> |
|
2 |
+ |
|
3 |
+#include "iit.h" |
|
4 |
+ |
|
5 |
+static void R_IIT_free(SEXP iit_R) { |
|
6 |
+ IIT_T iit = R_ExternalPtrAddr(iit_R); |
|
7 |
+ IIT_free(&iit); |
|
8 |
+} |
|
9 |
+ |
|
10 |
+SEXP R_IIT_new(IIT_T iit) { |
|
11 |
+ SEXP iit_R; |
|
12 |
+ iit_R = R_MakeExternalPtr((void *) iit, R_NilValue, R_NilValue); |
|
13 |
+ R_RegisterCFinalizer(iit_R, R_IIT_free); |
|
14 |
+ return iit_R; |
|
15 |
+} |
|
16 |
+ |
|
17 |
+SEXP R_iit_read(SEXP iitfile_R) { |
|
18 |
+ char *iitfile = (char *) CHAR(asChar(iitfile_R)); |
|
19 |
+ IIT_T iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true, |
|
20 |
+ /*divread*/READ_ALL, /*divstring*/NULL, |
|
21 |
+ /*add_iit_p*/false, /*labels_read_p*/true); |
|
22 |
+ return R_IIT_new(iit); |
|
23 |
+} |
|
24 |
+ |
|
25 |
+/* 'IIT_output_direct' is not included in the libgstruct binary */ |
|
26 |
+ |
|
27 |
+/* |
|
28 |
+ #include <gstruct/iit-write.h> |
|
29 |
+ |
|
30 |
+ SEXP R_iit_write(SEXP tally_iit_R, SEXP iitfile_R) { |
|
31 |
+ IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R); |
|
32 |
+ char *iitfile = (char *) CHAR(asChar(iitfile_R)); |
|
33 |
+ IIT_output_direct(iitfile, tally_iit, IIT_LATEST_VERSION); |
|
34 |
+ return iitfile_R; |
|
35 |
+ } |
|
36 |
+*/ |
|
37 |
+ |