32a9b516 |
/*
IIT API:
* IIT_read
* IIT_annotation
* IIT_interval(iit, which), Interval_low, Interval_length, Interval_sign
Coordinate mapping (read divs, not labels):
Any overlap:
* IIT_get(&(*nmatches),*iit,*divstring,*coordstart,*coordend,sortp);
By strand:
* IIT_get_signed()
Exact coordinate match:
* IIT_get_exact_multiple()
Can restrict by "type" of feature:
* typeint = IIT_typeint(*iit,typestring)
* IIT_get_typed(&(*nmatches),*iit,*divstring,*coordstart,*coordend,typeint,
sortp);
* IIT_get_typed_signed()
Restrict by label (read labels, not divs):
* IIT_find(&(*nmatches),iit,query);
Read standard annotation:
* field = IIT_annotation(&restofheader,iit,index,&allocp);
Or a specific field:
* fieldint = IIT_fieldint(iit,fieldstring)
* field = IIT_fieldvalue(iit,index,fieldint);
Get labels:
* IIT_label(iit, index, &allocp)
Introspection:
Metadata:
* IIT_name
* IIT_version
* IIT_total_nintervals
Types:
* IIT_ntypes(iit)
* IIT_typestring (iit, typeint);
Fields:
* IIT_nfields(iit)
* IIT_fieldstring(iit, fieldint);
*/
typedef struct IITMatches {
IIT_T iit;
int **subscripts;
int *nsubscripts;
int nqueries;
} IITMatches;
static IITMatches _new_IITMatches(IIT_T iit, int nqueries) {
IITMatches matches;
matches.iit = iit;
matches.subscripts = (int **) R_alloc(sizeof(int *), nqueries);
matches.nsubscripts = (int *) R_alloc(sizeof(int), nqueries);
matches.nqueries = nqueries;
return matches;
}
SEXP R_iit_open(SEXP iitfile_R, SEXP divread_R, SEXP labels_read_R) {
|
32a9b516 |
static IITMatches _iit_find(IIT_T iit, SEXP which_R) {
IITMatches matches = _new_IITMatches(iit, length(which_R));
for (int i = 0; i < length(which_R); i++) {
matches.subscripts[i] =
IIT_find(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(which_R, i)));
}
return matches;
}
static IITMatches _iit_get_exact_multiple(IIT_T iit,
SEXP chr_R, int *start, int *end,
int type)
{
IITMatches matches = _new_IITMatches(iit, length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
matches.subscripts[i] =
IIT_get_exact_multiple(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(chr_R, i)),
start[i], end[i], type);
}
return matches;
}
static IITMatches _iit_get_typed(IIT_T iit,
SEXP chr_R, int *start, int *end,
int type)
{
IITMatches matches = _new_IITMatches(iit, length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
matches.subscripts[i] =
IIT_get_typed(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(chr_R, i)),
start[i], end[i], type,
/*sortp*/false);
}
return matches;
}
static IITMatches _iit_get(IIT_T iit, SEXP chr_R, int *start, int *end)
{
IITMatches matches = _new_IITMatches(iit, length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
matches.subscripts[i] =
IIT_get(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(chr_R, i)),
start[i], end[i],
/*sortp*/false);
}
return matches;
}
static IITMatches _iit_get_typed_signed(IIT_T iit,
SEXP chr_R, int *start, int *end,
int type, int *sign)
{
IITMatches matches = _new_IITMatches(iit, length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
matches.subscripts[i] =
IIT_get_typed_signed(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(chr_R, i)),
start[i], end[i], type, sign[i],
/*sortp*/false);
}
return matches;
}
static IITMatches _iit_get_signed(IIT_T iit,
SEXP chr_R, int *start, int *end,
int *sign)
{
IITMatches matches = _new_IITMatches(iit, length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
matches.subscripts[i] =
IIT_get_signed(matches.nsubscripts + i, iit,
(char *)CHAR(STRING_ELT(chr_R, i)),
start[i], end[i], sign[i],
/*sortp*/false);
}
return matches;
}
static IITMatches _iit_get_for_coords(IIT_T iit, SEXP which_R, SEXP type_R,
SEXP ignore_strand_R, SEXP exact_R)
{
SEXP chr_R = VECTOR_ELT(which_R, 0);
int *start = INTEGER(VECTOR_ELT(which_R, 1));
int *end = INTEGER(VECTOR_ELT(which_R, 2));
int *sign = INTEGER(VECTOR_ELT(which_R, 3));
int type = type_R == R_NilValue ? 0 :
IIT_typeint(iit, (char *)CHAR(asChar(type_R)));
bool ignore_strand = asLogical(ignore_strand_R);
bool exact = asLogical(exact_R);
IITMatches matches;
if (exact) {
/* sign filtering happens in R */
matches = _iit_get_exact_multiple(iit, chr_R, start, end, type);
} else if (ignore_strand) {
if (type > 0) {
matches = _iit_get_typed(iit, chr_R, start, end, type);
} else {
matches = _iit_get(iit, chr_R, start, end);
}
} else {
if (type > 0) {
matches = _iit_get_typed_signed(iit, chr_R, start, end, type, sign);
} else {
matches = _iit_get_signed(iit, chr_R, start, end, sign);
}
}
return matches;
}
static IITMatches _iit_get_for_labels(IIT_T iit, SEXP which_R) {
return _iit_find(iit, which_R);
}
enum { CHR, START, WIDTH, STRAND, ANNO, ANS_LENGTH };
static SEXP _convert_matches(IITMatches matches, bool ret_ranges, SEXP fields_R)
{
SEXP ans, chr_R, start_R, width_R, strand_R, anno_R;
int nfields = fields_R == R_NilValue ? 1 : length(fields_R);
int *fields;
int nmatches = 0;
IIT_T iit = matches.iit;
for (int m = 0; m < matches.nqueries; m++) {
nmatches += matches.nsubscripts[m];
}
PROTECT(ans = allocVector(VECSXP, ANS_LENGTH));
if (ret_ranges) {
chr_R = allocVector(STRSXP, nmatches);
SET_VECTOR_ELT(ans, CHR, chr_R);
start_R = allocVector(INTSXP, nmatches);
SET_VECTOR_ELT(ans, START, start_R);
width_R = allocVector(INTSXP, nmatches);
SET_VECTOR_ELT(ans, WIDTH, width_R);
strand_R = allocVector(INTSXP, nmatches);
SET_VECTOR_ELT(ans, STRAND, strand_R);
}
anno_R = allocVector(VECSXP, nfields);
SET_VECTOR_ELT(ans, ANNO, anno_R);
for (int f = 0; f < nfields; f++) {
SET_VECTOR_ELT(anno_R, f, allocVector(STRSXP, nmatches));
}
if (fields_R != R_NilValue) {
fields = (int *)R_alloc(sizeof(int), nfields);
for (int f = 0; f < nfields; f++) {
fields[f] = IIT_fieldint(iit, (char *)STRING_ELT(fields_R, f));
}
}
for (int i = 0; i < nmatches; i++) {
if (ret_ranges) {
Interval_T interval = IIT_interval(iit, i);
SET_STRING_ELT(chr_R, i, mkChar(IIT_divstring_from_index(iit, i)));
INTEGER(start_R)[i] = Interval_low(interval);
INTEGER(width_R)[i] = Interval_length(interval);
INTEGER(strand_R)[i] = Interval_sign(interval);
}
if (fields_R == R_NilValue) {
char *restofheader;
bool allocp;
SET_STRING_ELT(anno_R, i,
mkChar(IIT_annotation(&restofheader, iit, i,
&allocp)));
if (allocp == true) {
free(restofheader);
}
} else {
for (int f = 0; f < nfields; f++) {
SET_STRING_ELT(VECTOR_ELT(anno_R, f), i,
mkChar(IIT_fieldvalue(iit, i, fields[f])));
}
}
}
UNPROTECT(1);
return ans;
}
SEXP R_iit_read(SEXP iit_R, SEXP which_R, SEXP type_R, SEXP fields_R,
SEXP ignore_strand_R, SEXP exact_R, SEXP ret_ranges_R)
{
IITMatches matches;
IIT_T iit = R_ExternalPtrAddr(iit_R);
bool ret_ranges = asLogical(ret_ranges_R);
if (TYPEOF(which_R) == VECSXP) {
matches = _iit_get_for_coords(iit, which_R, type_R,
ignore_strand_R, exact_R);
} else {
matches = _iit_get_for_labels(iit, which_R);
}
return _convert_matches(matches, ret_ranges, fields_R);
}
SEXP R_iit_typeNames(SEXP iit_R) {
IIT_T iit = R_ExternalPtrAddr(iit_R);
SEXP ans;
PROTECT(ans = allocVector(STRSXP, IIT_ntypes(iit)));
for (int i = 0; i < IIT_ntypes(iit); i++) {
SET_STRING_ELT(ans, i, mkChar(IIT_typestring(iit, i)));
}
UNPROTECT(1);
return ans;
}
SEXP R_iit_fieldNames(SEXP iit_R) {
IIT_T iit = R_ExternalPtrAddr(iit_R);
SEXP ans;
PROTECT(ans = allocVector(STRSXP, IIT_nfields(iit)));
for (int i = 0; i < IIT_nfields(iit); i++) {
SET_STRING_ELT(ans, i, mkChar(IIT_fieldstring(iit, i)));
}
UNPROTECT(1);
return ans;
}
SEXP R_iit_length(SEXP iit_R) {
IIT_T iit = R_ExternalPtrAddr(iit_R);
return ScalarInteger(IIT_total_nintervals(iit));
}
|