Browse code

start on C-level interface to IIT files

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@125153 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 14/12/2016 18:20:54
Showing 1 changed files
... ...
@@ -1,3 +1,4 @@
1
+#include <stdlib.h>
1 2
 #include <gstruct/iit-read.h>
2 3
 
3 4
 #include "iit.h"
... ...
@@ -14,14 +15,308 @@ SEXP R_IIT_new(IIT_T iit) {
14 15
   return iit_R;
15 16
 }
16 17
 
17
-SEXP R_iit_read(SEXP iitfile_R) {
18
+/*
19
+ IIT API:
20
+ * IIT_read
21
+ * IIT_annotation
22
+ * IIT_interval(iit, which), Interval_low, Interval_length, Interval_sign
23
+
24
+ Coordinate mapping (read divs, not labels):
25
+   Any overlap:
26
+   * IIT_get(&(*nmatches),*iit,*divstring,*coordstart,*coordend,sortp);
27
+   By strand:
28
+   * IIT_get_signed()
29
+   Exact coordinate match:
30
+   * IIT_get_exact_multiple()
31
+ 
32
+ Can restrict by "type" of feature:
33
+ * typeint = IIT_typeint(*iit,typestring)
34
+ * IIT_get_typed(&(*nmatches),*iit,*divstring,*coordstart,*coordend,typeint,
35
+                 sortp);
36
+ * IIT_get_typed_signed()
37
+
38
+ Restrict by label (read labels, not divs):
39
+ * IIT_find(&(*nmatches),iit,query);
40
+
41
+ Read standard annotation:
42
+ * field = IIT_annotation(&restofheader,iit,index,&allocp);
43
+ Or a specific field:
44
+ * fieldint = IIT_fieldint(iit,fieldstring)
45
+ * field = IIT_fieldvalue(iit,index,fieldint);
46
+
47
+ Get labels:
48
+ * IIT_label(iit, index, &allocp)
49
+ 
50
+ Introspection: 
51
+ Metadata:
52
+ * IIT_name
53
+ * IIT_version
54
+ * IIT_total_nintervals
55
+ Types:
56
+ * IIT_ntypes(iit)
57
+ * IIT_typestring (iit, typeint);
58
+ Fields:
59
+ * IIT_nfields(iit)
60
+ * IIT_fieldstring(iit, fieldint);
61
+*/
62
+
63
+typedef struct IITMatches {
64
+    IIT_T iit;
65
+    int **subscripts;
66
+    int *nsubscripts;
67
+    int nqueries;
68
+} IITMatches;
69
+
70
+static IITMatches _new_IITMatches(IIT_T iit, int nqueries) {
71
+    IITMatches matches;
72
+    matches.iit = iit;
73
+    matches.subscripts = (int **) R_alloc(sizeof(int *), nqueries);
74
+    matches.nsubscripts = (int *) R_alloc(sizeof(int), nqueries);
75
+    matches.nqueries = nqueries;
76
+    return matches;
77
+}
78
+
79
+SEXP R_iit_open(SEXP iitfile_R, SEXP divread_R, SEXP labels_read_R) {
18 80
   char *iitfile = (char *) CHAR(asChar(iitfile_R));
81
+  int divread = asLogical(divread_R) ? READ_ALL : READ_NONE;
82
+  bool labels_read = asLogical(labels_read_R);
19 83
   IIT_T iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true,
20
-                       /*divread*/READ_ALL, /*divstring*/NULL,
21
-                       /*add_iit_p*/false, /*labels_read_p*/true);
84
+                       divread, /*divstring*/NULL,
85
+                       /*add_iit_p*/false, labels_read);
22 86
   return R_IIT_new(iit);
23 87
 }
24 88
 
89
+static IITMatches _iit_find(IIT_T iit, SEXP which_R) {
90
+    IITMatches matches = _new_IITMatches(iit, length(which_R));
91
+    for (int i = 0; i < length(which_R); i++) {
92
+	matches.subscripts[i] =
93
+	    IIT_find(matches.nsubscripts + i, iit,
94
+		     (char *)CHAR(STRING_ELT(which_R, i)));
95
+    }
96
+    return matches;    
97
+}
98
+
99
+static IITMatches _iit_get_exact_multiple(IIT_T iit,
100
+					  SEXP chr_R, int *start, int *end,
101
+					  int type)
102
+{
103
+    IITMatches matches = _new_IITMatches(iit, length(chr_R));
104
+    for (int i = 0; i < length(chr_R); i++) {
105
+	matches.subscripts[i] =
106
+	    IIT_get_exact_multiple(matches.nsubscripts + i, iit,
107
+				   (char *)CHAR(STRING_ELT(chr_R, i)),
108
+				   start[i], end[i], type);
109
+    }
110
+    return matches;
111
+}
112
+
113
+static IITMatches _iit_get_typed(IIT_T iit,
114
+				 SEXP chr_R, int *start, int *end,
115
+				 int type)
116
+{
117
+    IITMatches matches = _new_IITMatches(iit, length(chr_R));
118
+    for (int i = 0; i < length(chr_R); i++) {
119
+	matches.subscripts[i] =
120
+	    IIT_get_typed(matches.nsubscripts + i, iit,
121
+			  (char *)CHAR(STRING_ELT(chr_R, i)),
122
+			  start[i], end[i], type,
123
+			  /*sortp*/false);
124
+    }
125
+    return matches;
126
+}
127
+
128
+static IITMatches _iit_get(IIT_T iit, SEXP chr_R, int *start, int *end)
129
+{
130
+    IITMatches matches = _new_IITMatches(iit, length(chr_R));
131
+    for (int i = 0; i < length(chr_R); i++) {
132
+	matches.subscripts[i] =
133
+	    IIT_get(matches.nsubscripts + i, iit,
134
+		    (char *)CHAR(STRING_ELT(chr_R, i)),
135
+		    start[i], end[i],
136
+		    /*sortp*/false);
137
+    }
138
+    return matches;
139
+}
140
+
141
+static IITMatches _iit_get_typed_signed(IIT_T iit,
142
+					SEXP chr_R, int *start, int *end,
143
+					int type, int *sign)
144
+{
145
+    IITMatches matches = _new_IITMatches(iit, length(chr_R));
146
+    for (int i = 0; i < length(chr_R); i++) {
147
+	matches.subscripts[i] =
148
+	    IIT_get_typed_signed(matches.nsubscripts + i, iit,
149
+				 (char *)CHAR(STRING_ELT(chr_R, i)),
150
+				 start[i], end[i], type, sign[i],
151
+				 /*sortp*/false);
152
+    }
153
+    return matches;
154
+}
155
+
156
+static IITMatches _iit_get_signed(IIT_T iit,
157
+				  SEXP chr_R, int *start, int *end,
158
+				  int *sign)
159
+{
160
+    IITMatches matches = _new_IITMatches(iit, length(chr_R));
161
+    for (int i = 0; i < length(chr_R); i++) {
162
+	matches.subscripts[i] =
163
+	    IIT_get_signed(matches.nsubscripts + i, iit,
164
+			   (char *)CHAR(STRING_ELT(chr_R, i)),
165
+			   start[i], end[i], sign[i],
166
+			   /*sortp*/false);
167
+    }
168
+    return matches;
169
+}
170
+
171
+static IITMatches _iit_get_for_coords(IIT_T iit, SEXP which_R, SEXP type_R,
172
+				      SEXP ignore_strand_R, SEXP exact_R)
173
+{
174
+    SEXP chr_R = VECTOR_ELT(which_R, 0);
175
+    int *start = INTEGER(VECTOR_ELT(which_R, 1));
176
+    int *end = INTEGER(VECTOR_ELT(which_R, 2));
177
+    int *sign = INTEGER(VECTOR_ELT(which_R, 3));
178
+    int type = type_R == R_NilValue ? 0 :
179
+	IIT_typeint(iit, (char *)CHAR(asChar(type_R)));
180
+    bool ignore_strand = asLogical(ignore_strand_R);
181
+    bool exact = asLogical(exact_R);
182
+    IITMatches matches;
183
+    
184
+    if (exact) {
185
+	/* sign filtering happens in R */
186
+	matches = _iit_get_exact_multiple(iit, chr_R, start, end, type);
187
+    } else if (ignore_strand) {
188
+	if (type > 0) {
189
+	    matches = _iit_get_typed(iit, chr_R, start, end, type);
190
+	} else {
191
+	    matches = _iit_get(iit, chr_R, start, end);
192
+	}
193
+    } else {
194
+	if (type > 0) {
195
+	    matches = _iit_get_typed_signed(iit, chr_R, start, end, type, sign);
196
+	} else {
197
+	    matches = _iit_get_signed(iit, chr_R, start, end, sign);
198
+	}
199
+    }
200
+    
201
+    return matches;
202
+}
203
+
204
+static IITMatches _iit_get_for_labels(IIT_T iit, SEXP which_R) {
205
+    return _iit_find(iit, which_R);
206
+}
207
+
208
+enum { CHR, START, WIDTH, STRAND, ANNO, ANS_LENGTH };
209
+
210
+static SEXP _convert_matches(IITMatches matches, bool ret_ranges, SEXP fields_R)
211
+{
212
+    SEXP ans, chr_R, start_R, width_R, strand_R, anno_R;
213
+    int nfields = fields_R == R_NilValue ? 1 : length(fields_R);
214
+    int *fields;
215
+    int nmatches = 0;
216
+    IIT_T iit = matches.iit;
217
+
218
+    for (int m = 0; m < matches.nqueries; m++) {
219
+	nmatches += matches.nsubscripts[m];
220
+    }
221
+
222
+    PROTECT(ans = allocVector(VECSXP, ANS_LENGTH));
223
+    if (ret_ranges) {
224
+	chr_R = allocVector(STRSXP, nmatches);
225
+	SET_VECTOR_ELT(ans, CHR, chr_R);
226
+	start_R = allocVector(INTSXP, nmatches);
227
+	SET_VECTOR_ELT(ans, START, start_R);
228
+	width_R = allocVector(INTSXP, nmatches);
229
+	SET_VECTOR_ELT(ans, WIDTH, width_R);
230
+	strand_R = allocVector(INTSXP, nmatches);
231
+	SET_VECTOR_ELT(ans, STRAND, strand_R);
232
+    }
233
+    anno_R = allocVector(VECSXP, nfields);
234
+    SET_VECTOR_ELT(ans, ANNO, anno_R);
235
+
236
+    for (int f = 0; f < nfields; f++) {
237
+	SET_VECTOR_ELT(anno_R, f, allocVector(STRSXP, nmatches));
238
+    }
239
+
240
+    if (fields_R != R_NilValue) {
241
+	fields = (int *)R_alloc(sizeof(int), nfields);
242
+	for (int f = 0; f < nfields; f++) {
243
+	    fields[f] = IIT_fieldint(iit, (char *)STRING_ELT(fields_R, f));
244
+	}
245
+    }
246
+    
247
+    for (int i = 0; i < nmatches; i++) {
248
+	if (ret_ranges) {
249
+	    Interval_T interval = IIT_interval(iit, i);
250
+	    SET_STRING_ELT(chr_R, i, mkChar(IIT_divstring_from_index(iit, i)));
251
+	    INTEGER(start_R)[i] = Interval_low(interval);
252
+	    INTEGER(width_R)[i] = Interval_length(interval);
253
+	    INTEGER(strand_R)[i] = Interval_sign(interval);
254
+	}
255
+	if (fields_R == R_NilValue) {
256
+	    char *restofheader;
257
+	    bool allocp;
258
+	    SET_STRING_ELT(anno_R, i,
259
+			   mkChar(IIT_annotation(&restofheader, iit, i,
260
+						 &allocp)));
261
+	    if (allocp == true) {
262
+		free(restofheader);
263
+	    }
264
+	} else {
265
+	    for (int f = 0; f < nfields; f++) {
266
+		SET_STRING_ELT(VECTOR_ELT(anno_R, f), i,
267
+			       mkChar(IIT_fieldvalue(iit, i, fields[f])));
268
+	    }
269
+	}
270
+    }
271
+
272
+    UNPROTECT(1);
273
+    return ans;
274
+}
275
+
276
+SEXP R_iit_read(SEXP iit_R, SEXP which_R, SEXP type_R, SEXP fields_R,
277
+		SEXP ignore_strand_R, SEXP exact_R, SEXP ret_ranges_R)
278
+{
279
+    IITMatches matches;
280
+    IIT_T iit = R_ExternalPtrAddr(iit_R);
281
+    bool ret_ranges = asLogical(ret_ranges_R);
282
+
283
+    if (TYPEOF(which_R) == VECSXP) {
284
+	matches = _iit_get_for_coords(iit, which_R, type_R,
285
+				      ignore_strand_R, exact_R);
286
+    } else {
287
+	matches = _iit_get_for_labels(iit, which_R);
288
+    }
289
+
290
+    return _convert_matches(matches, ret_ranges, fields_R);
291
+}
292
+
293
+SEXP R_iit_typeNames(SEXP iit_R) {
294
+    IIT_T iit = R_ExternalPtrAddr(iit_R);
295
+    SEXP ans;
296
+    PROTECT(ans = allocVector(STRSXP, IIT_ntypes(iit)));
297
+    for (int i = 0; i < IIT_ntypes(iit); i++) {
298
+	SET_STRING_ELT(ans, i, mkChar(IIT_typestring(iit, i)));
299
+    }
300
+    UNPROTECT(1);
301
+    return ans;
302
+}
303
+
304
+SEXP R_iit_fieldNames(SEXP iit_R) {
305
+    IIT_T iit = R_ExternalPtrAddr(iit_R);
306
+    SEXP ans;
307
+    PROTECT(ans = allocVector(STRSXP, IIT_nfields(iit)));
308
+    for (int i = 0; i < IIT_nfields(iit); i++) {
309
+	SET_STRING_ELT(ans, i, mkChar(IIT_fieldstring(iit, i)));
310
+    }
311
+    UNPROTECT(1);
312
+    return ans;
313
+}
314
+
315
+SEXP R_iit_length(SEXP iit_R) {
316
+    IIT_T iit = R_ExternalPtrAddr(iit_R);
317
+    return ScalarInteger(IIT_total_nintervals(iit));
318
+}
319
+
25 320
 /* 'IIT_output_direct' is not included in the libgstruct binary */
26 321
 
27 322
 /*
Browse code

Decoupling the bam tallying from summarization. Soon, we will return an IIT, which can be summarized in various ways. Currently, the only way is the variant summary.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/gmapR@69806 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 25/09/2012 16:00:17
Showing 1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,37 @@
1
+#include <gstruct/iit-read.h>
2
+
3
+#include "iit.h"
4
+
5
+static void R_IIT_free(SEXP iit_R) {
6
+  IIT_T iit = R_ExternalPtrAddr(iit_R);
7
+  IIT_free(&iit);
8
+}
9
+
10
+SEXP R_IIT_new(IIT_T iit) {
11
+  SEXP iit_R;
12
+  iit_R = R_MakeExternalPtr((void *) iit, R_NilValue, R_NilValue);
13
+  R_RegisterCFinalizer(iit_R, R_IIT_free);
14
+  return iit_R;
15
+}
16
+
17
+SEXP R_iit_read(SEXP iitfile_R) {
18
+  char *iitfile = (char *) CHAR(asChar(iitfile_R));
19
+  IIT_T iit = IIT_read(iitfile, /*name*/NULL, /*readonlyp*/true,
20
+                       /*divread*/READ_ALL, /*divstring*/NULL,
21
+                       /*add_iit_p*/false, /*labels_read_p*/true);
22
+  return R_IIT_new(iit);
23
+}
24
+
25
+/* 'IIT_output_direct' is not included in the libgstruct binary */
26
+
27
+/*
28
+  #include <gstruct/iit-write.h>
29
+
30
+  SEXP R_iit_write(SEXP tally_iit_R, SEXP iitfile_R) {
31
+  IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R);
32
+  char *iitfile = (char *) CHAR(asChar(iitfile_R));
33
+  IIT_output_direct(iitfile, tally_iit, IIT_LATEST_VERSION);
34
+  return iitfile_R;
35
+  }
36
+*/
37
+