685d60e3 |
#define DEBUG2 0
|
c78be696 |
|
3f899953 |
#include <stdlib.h> /* for abs */
#include <string.h> /* for strlen */
#include "iit.h"
#include "bytestream.h"
|
685d60e3 |
enum { SEQNAMES, POS, REF, READ, N_CYCLES, N_CYCLES_REF, COUNT, COUNT_REF,
|
5659f096 |
RAW_COUNT_TOTAL, COUNT_TOTAL, COUNT_PLUS, COUNT_PLUS_REF, COUNT_MINUS,
|
685d60e3 |
COUNT_MINUS_REF, DELCOUNT_PLUS, DELCOUNT_MINUS,
READ_POS_MEAN, READ_POS_MEAN_REF, READ_POS_VAR,
READ_POS_VAR_REF, MDFNE, MDFNE_REF, CODON_STRAND,
COUNT_XS_PLUS, COUNT_XS_PLUS_REF,
COUNT_XS_MINUS, COUNT_XS_MINUS_REF,
COUNT_HIGH_NM, COUNT_HIGH_NM_REF, N_BASE_COLS };
|
38518ddb |
enum { STRAND_PLUS = 1,
STRAND_MINUS,
STRAND_NONE };
|
c78be696 |
static char *codon_table[64] =
|
e01ded3c |
{"AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT",
"AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT",
"CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT",
"CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT",
"GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT",
"GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT",
"TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT",
"TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT"};
|
c78be696 |
static int NUM_PTRS = 5;
|
3f899953 |
typedef struct TallyTable {
SEXP seqnames_R;
int *pos;
SEXP ref_R;
SEXP read_R;
|
96cf9938 |
int *n_cycles;
int *n_cycles_ref;
|
3f899953 |
int *count;
int *count_ref;
|
5659f096 |
int *raw_count_total;
|
3f899953 |
int *count_total;
int *count_plus;
int *count_plus_ref;
int *count_minus;
int *count_minus_ref;
|
8418591e |
int *delcount_plus;
int *delcount_minus;
|
ae7cf39f |
double *read_pos_mean;
double *read_pos_mean_ref;
double *read_pos_var;
double *read_pos_var_ref;
|
7c263469 |
double *mdfne;
double *mdfne_ref;
|
8418591e |
int *count_xs_plus;
int *count_xs_plus_ref;
int *count_xs_minus;
int *count_xs_minus_ref;
|
685d60e3 |
int *count_high_nm;
int *count_high_nm_ref;
|
c78be696 |
int *strand;
|
3f899953 |
int **cycle_bins;
} TallyTable;
|
7c263469 |
typedef struct TallyParam {
int *cycle_breaks;
int n_cycle_bins;
int read_length;
double *mdfne_buf;
|
8418591e |
bool xs;
|
685d60e3 |
int high_nm_score;
|
7c263469 |
} TallyParam;
|
8418591e |
static SEXP R_TallyTable_new(int n_rows, int n_cycle_bins, bool xs) {
|
3f899953 |
SEXP tally_R; /* the result list */
PROTECT(tally_R = allocVector(VECSXP, N_BASE_COLS + n_cycle_bins));
SET_VECTOR_ELT(tally_R, SEQNAMES, allocVector(STRSXP, n_rows));
SET_VECTOR_ELT(tally_R, POS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, REF, allocVector(STRSXP, n_rows));
SET_VECTOR_ELT(tally_R, READ, allocVector(STRSXP, n_rows));
|
96cf9938 |
SET_VECTOR_ELT(tally_R, N_CYCLES, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, N_CYCLES_REF, allocVector(INTSXP, n_rows));
|
3f899953 |
SET_VECTOR_ELT(tally_R, COUNT, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_REF, allocVector(INTSXP, n_rows));
|
5659f096 |
SET_VECTOR_ELT(tally_R, RAW_COUNT_TOTAL, allocVector(INTSXP, n_rows));
|
3f899953 |
SET_VECTOR_ELT(tally_R, COUNT_TOTAL, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_PLUS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_PLUS_REF, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_MINUS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_MINUS_REF, allocVector(INTSXP, n_rows));
|
ae7cf39f |
SET_VECTOR_ELT(tally_R, READ_POS_MEAN, allocVector(REALSXP, n_rows));
SET_VECTOR_ELT(tally_R, READ_POS_MEAN_REF, allocVector(REALSXP, n_rows));
SET_VECTOR_ELT(tally_R, READ_POS_VAR, allocVector(REALSXP, n_rows));
SET_VECTOR_ELT(tally_R, READ_POS_VAR_REF, allocVector(REALSXP, n_rows));
|
7c263469 |
SET_VECTOR_ELT(tally_R, MDFNE, allocVector(REALSXP, n_rows));
SET_VECTOR_ELT(tally_R, MDFNE_REF, allocVector(REALSXP, n_rows));
|
c78be696 |
SET_VECTOR_ELT(tally_R, CODON_STRAND, allocVector(INTSXP, n_rows));
|
8418591e |
SET_VECTOR_ELT(tally_R, DELCOUNT_PLUS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, DELCOUNT_MINUS, allocVector(INTSXP, n_rows));
|
685d60e3 |
if (xs) {
SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_XS_PLUS_REF, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_XS_MINUS_REF, allocVector(INTSXP, n_rows));
}
SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM, allocVector(INTSXP, n_rows));
SET_VECTOR_ELT(tally_R, COUNT_HIGH_NM_REF, allocVector(INTSXP, n_rows));
|
3f899953 |
for (int bin = 0; bin < n_cycle_bins; bin++) {
SEXP cycle_bin_R = allocVector(INTSXP, n_rows);
SET_VECTOR_ELT(tally_R, bin + N_BASE_COLS, cycle_bin_R);
}
UNPROTECT(1);
return tally_R;
}
|
8418591e |
static TallyTable *TallyTable_new(SEXP tally_R, bool xs) {
|
685d60e3 |
TallyTable *tally = (TallyTable *) S_alloc(sizeof(TallyTable), 1);
|
3f899953 |
int n_cycle_bins = length(tally_R) - N_BASE_COLS;
tally->seqnames_R = VECTOR_ELT(tally_R, SEQNAMES);
tally->pos = INTEGER(VECTOR_ELT(tally_R, POS));
tally->ref_R = VECTOR_ELT(tally_R, REF);
tally->read_R = VECTOR_ELT(tally_R, READ);
|
96cf9938 |
tally->n_cycles = INTEGER(VECTOR_ELT(tally_R, N_CYCLES));
tally->n_cycles_ref = INTEGER(VECTOR_ELT(tally_R, N_CYCLES_REF));
|
3f899953 |
tally->count = INTEGER(VECTOR_ELT(tally_R, COUNT));
tally->count_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_REF));
|
5659f096 |
tally->raw_count_total = INTEGER(VECTOR_ELT(tally_R, RAW_COUNT_TOTAL));
|
3f899953 |
tally->count_total = INTEGER(VECTOR_ELT(tally_R, COUNT_TOTAL));
tally->count_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS));
tally->count_plus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_PLUS_REF));
tally->count_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS));
tally->count_minus_ref = INTEGER(VECTOR_ELT(tally_R, COUNT_MINUS_REF));
|
ae7cf39f |
tally->read_pos_mean = REAL(VECTOR_ELT(tally_R, READ_POS_MEAN));
tally->read_pos_mean_ref = REAL(VECTOR_ELT(tally_R, READ_POS_MEAN_REF));
tally->read_pos_var = REAL(VECTOR_ELT(tally_R, READ_POS_VAR));
tally->read_pos_var_ref = REAL(VECTOR_ELT(tally_R, READ_POS_VAR_REF));
|
7c263469 |
tally->mdfne = REAL(VECTOR_ELT(tally_R, MDFNE));
tally->mdfne_ref = REAL(VECTOR_ELT(tally_R, MDFNE_REF));
|
c78be696 |
tally->strand = INTEGER(VECTOR_ELT(tally_R, CODON_STRAND));
|
8418591e |
tally->delcount_plus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_PLUS));
tally->delcount_minus = INTEGER(VECTOR_ELT(tally_R, DELCOUNT_MINUS));
|
3f899953 |
tally->cycle_bins = (int **) R_alloc(sizeof(int*), n_cycle_bins);
|
685d60e3 |
if (xs) {
tally->count_xs_plus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_PLUS));
tally->count_xs_plus_ref = INTEGER(VECTOR_ELT(tally_R,
COUNT_XS_PLUS_REF));
tally->count_xs_minus = INTEGER(VECTOR_ELT(tally_R, COUNT_XS_MINUS));
tally->count_xs_minus_ref = INTEGER(VECTOR_ELT(tally_R,
COUNT_XS_MINUS_REF));
}
tally->count_high_nm = INTEGER(VECTOR_ELT(tally_R, COUNT_HIGH_NM));
tally->count_high_nm_ref = INTEGER(VECTOR_ELT(tally_R,
COUNT_HIGH_NM_REF));
|
8418591e |
|
3f899953 |
for (int bin = 0; bin < n_cycle_bins; bin++) {
tally->cycle_bins[bin] = INTEGER(VECTOR_ELT(tally_R, bin + N_BASE_COLS));
}
|
c78be696 |
|
3f899953 |
return tally;
}
static void
|
5659f096 |
read_total_counts(unsigned char **bytes, int row, int *raw_count_total,
int *count_total)
{
raw_count_total[row] = read_int(bytes);
|
3f899953 |
int count_total_plus = read_int(bytes);
int count_total_minus = read_int(bytes);
count_total[row] = count_total_plus + count_total_minus;
}
|
685d60e3 |
static void
read_nm_counts(unsigned char **bytes, int row, int *count_high_nm,
int high_nm_score)
{
int n_nm = read_int(bytes);
count_high_nm[row] = 0;
/* FIXME: currently bam_tally gives us counts whether we want them or not */
for (int index = 0; index < n_nm; index++) {
int nm = read_int(bytes);
int count = read_int(bytes);
if (nm >= high_nm_score) {
count_high_nm[row] += count;
}
}
}
|
8418591e |
static void
read_xs_counts(unsigned char **bytes, int row, int *count_xs_plus,
int *count_xs_minus)
{
int n_xs = read_int(bytes);
|
12074fea |
if (count_xs_plus != NULL) {
count_xs_plus[row] = 0;
count_xs_minus[row] = 0;
|
685d60e3 |
}
|
8418591e |
for (int index = 0; index < n_xs; index++) {
int xs = read_int(bytes);
int count = read_int(bytes);
|
12074fea |
if (count_xs_plus != NULL) {
if (xs == 1) {
count_xs_plus[row] = count;
} else if (xs == 2) {
count_xs_minus[row] = count;
}
}
|
8418591e |
}
}
|
3f899953 |
static void
|
7c263469 |
read_cycle_counts(unsigned char **bytes, int row, TallyParam param,
|
96cf9938 |
int *n_cycles, double *read_pos_mean, double *read_pos_var,
|
7c263469 |
double *mdfne, int **cycle_bins)
|
3f899953 |
{
|
7c263469 |
int n_cycle_breaks = param.n_cycle_bins + 1;
|
ae7cf39f |
int count_sum = 0, weighted_sum = 0, weighted_sum_sq = 0;
|
7c263469 |
int midpoint = param.read_length / 2.0 + 0.5;
if (param.mdfne_buf != NULL) {
memset(param.mdfne_buf, 0, sizeof(int) * midpoint);
}
|
96cf9938 |
n_cycles[row] = read_int(bytes);
|
7c263469 |
|
96cf9938 |
for (int index = 0; index < n_cycles[row]; index++) {
|
97dc0c28 |
int cycle = abs(read_int(bytes));
int count = read_int(bytes);
int bin = 0;
|
7c263469 |
if (param.mdfne_buf != NULL && cycle <= param.read_length) {
param.mdfne_buf[midpoint-abs(cycle-midpoint)-1] = count;
}
|
97dc0c28 |
count_sum += count;
|
ae7cf39f |
weighted_sum += cycle * count;
weighted_sum_sq += cycle * cycle * count;
|
96cf9938 |
if (param.n_cycle_bins > 0) {
|
3f899953 |
while(n_cycle_breaks > bin &&
|
7c263469 |
cycle > param.cycle_breaks[bin])
|
3f899953 |
bin++;
if (bin > 0 && bin < n_cycle_breaks) {
cycle_bins[bin-1][row] += count;
}
}
|
97dc0c28 |
}
|
ae7cf39f |
read_pos_mean[row] = ((double)weighted_sum) / count_sum;
if (count_sum > 1) {
read_pos_var[row] = ((double)weighted_sum_sq) / (count_sum - 1) -
(count_sum / (count_sum - 1)) * read_pos_mean[row] * read_pos_mean[row];
} else {
read_pos_var[row] = NA_REAL;
}
|
7c263469 |
mdfne[row] = NA_REAL;
if (param.mdfne_buf != NULL) {
int prev_dist = 0;
int total_count = count_sum;
count_sum = 0;
for (int dist = 0; dist < midpoint; dist++) {
count_sum += param.mdfne_buf[dist];
if (count_sum > total_count / 2) {
if (total_count % 2 == 0) {
mdfne[row] = prev_dist + (dist - prev_dist) / 2.0;
} else {
mdfne[row] = dist;
}
break;
}
if (param.mdfne_buf[dist] > 0) {
prev_dist = dist;
}
}
}
|
3f899953 |
}
static int
parse_indels(unsigned char *bytes, int row,
|
7c263469 |
TallyParam param, TallyTable *tally, bool insertion)
|
3f899953 |
{
int indel_count = read_int(&bytes);
|
8c14ead5 |
for (int indel = 0; indel < indel_count; indel++, row++) {
|
7c263469 |
for (int b = 0; b < param.n_cycle_bins; b++) {
|
8c14ead5 |
tally->cycle_bins[b][row] = 0;
}
|
3f899953 |
tally->count_plus[row] = read_int(&bytes);
tally->count_minus[row] = read_int(&bytes);
tally->count[row] = tally->count_plus[row] + tally->count_minus[row];
|
97dc0c28 |
tally->count_plus_ref[row] = read_int(&bytes);
tally->count_minus_ref[row] = read_int(&bytes);
tally->count_ref[row] = tally->count_plus_ref[row] +
|
8418591e |
tally->count_minus_ref[row];
|
5659f096 |
tally->raw_count_total[row] = tally->count_ref[row] + tally->count[row];
tally->count_total[row] = tally->raw_count_total[row];
|
38518ddb |
tally->strand[row] = STRAND_NONE;
|
3f899953 |
SEXP seq_R = mkChar(read_string(&bytes));
if (insertion) {
SET_STRING_ELT(tally->read_R, row, seq_R);
SET_STRING_ELT(tally->ref_R, row, R_BlankString);
}
else {
SET_STRING_ELT(tally->read_R, row, R_BlankString);
SET_STRING_ELT(tally->ref_R, row, seq_R);
}
|
96cf9938 |
read_cycle_counts(&bytes, row, param, tally->n_cycles,
|
ae7cf39f |
tally->read_pos_mean, tally->read_pos_var,
|
7c263469 |
tally->mdfne, tally->cycle_bins);
|
685d60e3 |
read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
read_xs_counts(&bytes, row, tally->count_xs_plus, tally->count_xs_minus);
|
97dc0c28 |
/* no position from which to tabulate the cycles */
|
96cf9938 |
tally->n_cycles_ref[row] = NA_INTEGER;
|
ae7cf39f |
tally->read_pos_mean_ref[row] = NA_REAL;
tally->read_pos_var_ref[row] = NA_REAL;
|
7c263469 |
tally->mdfne_ref[row] = NA_REAL;
|
685d60e3 |
/* overlapping deletion count only relevant for SNVs */
tally->delcount_plus[row] = NA_INTEGER;
tally->delcount_minus[row] = NA_INTEGER;
/* information not available for "ref" allele */
tally->count_high_nm_ref[row] = NA_INTEGER;
if (param.xs) {
tally->count_xs_plus_ref[row] = NA_INTEGER;
tally->count_xs_minus_ref[row] = NA_INTEGER;
|
3f899953 |
}
}
|
685d60e3 |
return indel_count;
|
3f899953 |
}
static int
read_allele_counts(unsigned char **bytes, int row, SEXP read_R,
|
685d60e3 |
int *count_plus, int *count_minus, int *count,
int *delcount_plus, int *delcount_minus,
int strand)
|
3f899953 |
{
int n_alleles = 0;
|
8418591e |
char allele;
|
38518ddb |
char stop = strand == STRAND_NONE ? '\0' : (char) 255;
|
685d60e3 |
|
8418591e |
while((allele = (char)read_char(bytes)) != stop) {
|
685d60e3 |
if (allele == '_') {
*delcount_plus = read_int(bytes);
*delcount_minus = read_int(bytes);
continue;
}
|
38518ddb |
if(strand == STRAND_NONE)
|
c78be696 |
SET_STRING_ELT(read_R, row, mkCharLen(&allele, 1));
else
SET_STRING_ELT(read_R, row, mkCharLen(codon_table[(int)allele], 3));
|
3f899953 |
count_plus[row] = read_int(bytes);
count_minus[row] = read_int(bytes);
count[row] = count_plus[row] + count_minus[row];
row++;
n_alleles++;
}
|
8418591e |
|
3f899953 |
return n_alleles;
}
static int
|
7c263469 |
parse_alleles(unsigned char *bytes, int row, int ref_row,
|
c78be696 |
TallyParam param, TallyTable *tally, int strand)
|
3f899953 |
{
|
5659f096 |
read_total_counts(&bytes, row, tally->raw_count_total, tally->count_total);
|
685d60e3 |
int delcount_plus = 0, delcount_minus = 0;
|
3f899953 |
int n_alleles = read_allele_counts(&bytes, row, tally->read_R,
tally->count_plus, tally->count_minus,
|
c78be696 |
tally->count,
|
685d60e3 |
&delcount_plus, &delcount_minus,
|
c78be696 |
strand);
|
3f899953 |
for (int allele = 0; allele < n_alleles; allele++, row++) {
|
96cf9938 |
tally->n_cycles[row] = 0;
|
7c263469 |
for (int b = 0; b < param.n_cycle_bins; b++) {
|
3f899953 |
tally->cycle_bins[b][row] = 0;
}
|
c78be696 |
tally->strand[row] = strand;
|
685d60e3 |
tally->delcount_plus[row] = delcount_plus;
tally->delcount_minus[row] = delcount_minus;
|
3f899953 |
if (tally->count[row] > 0) {
|
96cf9938 |
read_cycle_counts(&bytes, row, param, tally->n_cycles,
|
ae7cf39f |
tally->read_pos_mean, tally->read_pos_var,
|
7c263469 |
tally->mdfne, tally->cycle_bins);
|
685d60e3 |
read_nm_counts(&bytes, row, tally->count_high_nm, param.high_nm_score);
|
8418591e |
if (param.xs) {
|
685d60e3 |
read_xs_counts(&bytes, row, tally->count_xs_plus,
tally->count_xs_minus);
|
8418591e |
}
|
97dc0c28 |
} else {
|
ae7cf39f |
tally->read_pos_mean[row] = R_NaN;
tally->read_pos_var[row] = NA_REAL;
|
7c263469 |
tally->mdfne[row] = NA_REAL;
|
5659f096 |
tally->count_high_nm[row] = 0;
|
8418591e |
if (param.xs) {
tally->count_xs_plus[row] = 0;
tally->count_xs_minus[row] = 0;
}
|
3f899953 |
}
}
for (int r = ref_row; r < row; r++) {
|
96cf9938 |
tally->n_cycles_ref[r] = tally->n_cycles[ref_row];
|
5659f096 |
tally->raw_count_total[r] = tally->raw_count_total[ref_row];
|
3f899953 |
tally->count_total[r] = tally->count_total[ref_row];
tally->count_plus_ref[r] = tally->count_plus[ref_row];
tally->count_minus_ref[r] = tally->count_minus[ref_row];
tally->count_ref[r] = tally->count_plus_ref[r] + tally->count_minus_ref[r];
|
ae7cf39f |
tally->read_pos_mean_ref[r] = tally->read_pos_mean[ref_row];
tally->read_pos_var_ref[r] = tally->read_pos_var[ref_row];
|
7c263469 |
tally->mdfne_ref[r] = tally->mdfne[ref_row];
|
685d60e3 |
tally->count_high_nm_ref[r] = tally->count_high_nm[ref_row];
|
8418591e |
if (param.xs) {
tally->count_xs_plus_ref[r] = tally->count_xs_plus[ref_row];
tally->count_xs_minus_ref[r] = tally->count_xs_minus[ref_row];
}
|
c78be696 |
SET_STRING_ELT(tally->ref_R, r, STRING_ELT(tally->read_R, ref_row));
|
3f899953 |
}
|
c78be696 |
|
3f899953 |
/* clear the 'alt' columns for the 'ref' row with NAs */
SET_STRING_ELT(tally->read_R, ref_row, NA_STRING);
|
96cf9938 |
tally->n_cycles[ref_row] = NA_INTEGER;
|
3f899953 |
tally->count_plus[ref_row] = NA_INTEGER;
tally->count_minus[ref_row] = NA_INTEGER;
tally->count[ref_row] = NA_INTEGER;
|
ae7cf39f |
tally->read_pos_mean[ref_row] = NA_REAL;
tally->read_pos_var[ref_row] = NA_REAL;
|
7c263469 |
tally->mdfne[ref_row] = NA_REAL;
|
8418591e |
if (param.xs) {
tally->count_xs_plus[ref_row] = NA_INTEGER;
tally->count_xs_minus[ref_row] = NA_INTEGER;
}
|
3f899953 |
return n_alleles;
}
static int parse_indel_count(unsigned char *bytes) {
int count = read_int(&bytes);
return count;
}
static int parse_allele_count(unsigned char *bytes) {
int n_alleles = 1; /* always have a reference */
|
5659f096 |
bytes += sizeof(int) * 5 + 1; /* skip total and reference */
|
3f899953 |
while(bytes[0] != '\0') {
|
685d60e3 |
if (bytes[0] != '_')
n_alleles++;
bytes += sizeof(int) * 2 + 1;
|
c78be696 |
}
return n_alleles;
}
static int parse_codon_count(unsigned char *bytes) {
int n_alleles = 1; /* always have a reference */
bytes += sizeof(int) * 4 + 1; /* skip reference */
while((int) bytes[0] != 255 ){ // && bytes [0] != '\0') {
|
3f899953 |
bytes += sizeof(int) * 2 + 1;
n_alleles++;
}
return n_alleles;
}
|
c78be696 |
|
3f899953 |
static int count_rows_for_interval(IIT_T tally_iit, int index) {
|
dbda16a8 |
int n_rows = 0;
|
3f899953 |
unsigned char *bytes = IIT_data(tally_iit, index);
int width = IIT_length(tally_iit, index);
|
c78be696 |
unsigned char *base = bytes + (NUM_PTRS * width + 1) * sizeof(int);
|
3f899953 |
for (int pos = 0; pos < width; pos++) {
int insertion_offset = read_int(&bytes);
int deletion_offset = read_int(&bytes);
int allele_offset = read_int(&bytes);
|
c78be696 |
int codon_plus_offset = read_int(&bytes);
int codon_minus_offset = read_int(&bytes);
|
3f899953 |
int next_offset = read_int(&bytes);
bytes -= 4; /* rewind from read-ahead */
if (deletion_offset - insertion_offset > 0) {
n_rows += parse_indel_count(base + insertion_offset);
}
if (allele_offset - deletion_offset > 0) {
n_rows += parse_indel_count(base + deletion_offset);
}
|
c78be696 |
if (codon_plus_offset - allele_offset > 0) {
|
3f899953 |
n_rows += parse_allele_count(base + allele_offset);
}
|
c78be696 |
if (codon_minus_offset - codon_plus_offset > 0) {
n_rows += parse_codon_count(base + codon_plus_offset);
}
if (next_offset - codon_minus_offset > 0) {
n_rows += parse_codon_count(base + codon_minus_offset);
}
|
3f899953 |
}
return n_rows;
}
static int parse_interval(IIT_T tally_iit, int index,
|
7c263469 |
TallyParam param, TallyTable *tally, int row)
|
3f899953 |
{
unsigned char *bytes = IIT_data(tally_iit, index);
int width = IIT_length(tally_iit, index);
|
c78be696 |
unsigned char *base = bytes + (NUM_PTRS * width + 1) * sizeof(int);
|
3f899953 |
int start = IIT_interval_low(tally_iit, index);
SEXP divstring_R;
PROTECT(divstring_R = mkChar(IIT_divstring_from_index(tally_iit, index)));
for (int position = 0; position < width; position++) {
int insertion_offset = read_int(&bytes);
int deletion_offset = read_int(&bytes);
int allele_offset = read_int(&bytes);
|
c78be696 |
int codon_plus_offset = read_int(&bytes);
int codon_minus_offset = read_int(&bytes);
|
3f899953 |
int next_offset = read_int(&bytes);
int ref_row = row;
bytes -= 4; /* rewind from read-ahead */
|
c78be696 |
if (codon_plus_offset - allele_offset > 0)
|
3f899953 |
row += parse_alleles(base + allele_offset, row, ref_row,
|
38518ddb |
param, tally, STRAND_NONE);
|
c78be696 |
if (deletion_offset - insertion_offset > 0)
|
3f899953 |
row += parse_indels(base + insertion_offset, row,
|
7c263469 |
param, tally, true);
|
3f899953 |
if (allele_offset - deletion_offset > 0)
row += parse_indels(base + deletion_offset, row,
|
7c263469 |
param, tally, false);
|
c78be696 |
if (codon_minus_offset - codon_plus_offset > 0) {
row += parse_alleles(base + codon_plus_offset, row, row,
|
38518ddb |
param, tally, STRAND_PLUS);
|
c78be696 |
}
if (next_offset - codon_minus_offset > 0) {
row += parse_alleles(base + codon_minus_offset, row, row,
|
38518ddb |
param, tally, STRAND_MINUS);
|
c78be696 |
}
|
3f899953 |
/* fill in position information */
for (int r = ref_row; r < row; r++) {
SET_STRING_ELT(tally->seqnames_R, r, divstring_R);
tally->pos[r] = start + position;
}
}
UNPROTECT(1);
return row;
}
|
7c263469 |
static SEXP parse_all(IIT_T tally_iit, TallyParam param)
|
3f899953 |
{
int n_rows = 0;
/* loop over the IIT, getting the total number of rows
this is (num alts + 1) for every position. */
for (int index = 1; index <= IIT_total_nintervals(tally_iit); index++) {
n_rows += count_rows_for_interval(tally_iit, index);
}
SEXP tally_R;
|
8418591e |
PROTECT(tally_R = R_TallyTable_new(n_rows, param.n_cycle_bins, param.xs));
TallyTable *tally = TallyTable_new(tally_R, param.xs);
|
3f899953 |
int row = 0;
for (int index = 1; index <= IIT_total_nintervals(tally_iit); index++) {
|
7c263469 |
row = parse_interval(tally_iit, index, param, tally, row);
|
3f899953 |
}
UNPROTECT(1);
return tally_R;
}
|
7c263469 |
static SEXP parse_some(IIT_T tally_iit, TallyParam param,
|
3f899953 |
SEXP chr_R, int *start, int *end)
{
int n_rows = 0;
int *nmatches = (int *) R_alloc(sizeof(int), length(chr_R));
int **indexes = (int **) R_alloc(sizeof(int*), length(chr_R));
for (int i = 0; i < length(chr_R); i++) {
indexes[i] = IIT_get(nmatches + i, tally_iit,
(char *) CHAR(STRING_ELT(chr_R, i)),
start[i], end[i], false);
for (int j = 0; j < nmatches[i]; j++) {
n_rows += count_rows_for_interval(tally_iit, indexes[i][j]);
}
}
SEXP tally_R;
|
8418591e |
PROTECT(tally_R = R_TallyTable_new(n_rows, param.n_cycle_bins, param.xs));
TallyTable *tally = TallyTable_new(tally_R, param.xs);
|
3f899953 |
int row = 0;
for (int i = 0; i < length(chr_R); i++) {
for (int j = 0; j < nmatches[i]; j++) {
|
7c263469 |
row = parse_interval(tally_iit, indexes[i][j], param, tally, row);
|
3f899953 |
}
}
UNPROTECT(1);
return tally_R;
}
/* FORMAT
block header:
|
685d60e3 |
[0][ins][del][mm][codon+][codon-]
|
3f899953 |
|
685d60e3 |
mismatches/codons:
[t+][t-][rn][r+][r-]([an][a+][a-])*
mismatches only:
'_'[d+][d-][\0]([c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*)*
|
3f899953 |
insertions:
|
685d60e3 |
[i#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
|
3f899953 |
deletions:
|
685d60e3 |
[d#]([t+][t-][r+][r-][seq][c#]([cv][cc])*[nm#]([nmv][nmc])*[xs#]([xsv][xsc])*
|
3f899953 |
*/
SEXP R_tally_iit_parse(SEXP tally_iit_R, SEXP cycle_breaks_R,
|
685d60e3 |
SEXP which_R,
SEXP read_length_R, SEXP xs_R, SEXP high_nm_score_R)
|
3f899953 |
{
IIT_T tally_iit = (IIT_T) R_ExternalPtrAddr(tally_iit_R);
SEXP tally_R;
|
7c263469 |
TallyParam param;
param.cycle_breaks =
cycle_breaks_R == R_NilValue ? NULL : INTEGER(cycle_breaks_R);
param.n_cycle_bins =
length(cycle_breaks_R) == 0 ? 0 : length(cycle_breaks_R) - 1;
param.read_length = asInteger(read_length_R);
if (param.read_length != NA_INTEGER) {
param.mdfne_buf = (double *)R_alloc(sizeof(double), param.read_length);
|
849b7451 |
} else {
param.mdfne_buf = NULL;
|
7c263469 |
}
|
8418591e |
param.xs = asLogical(xs_R);
|
685d60e3 |
param.high_nm_score = asInteger(high_nm_score_R);
|
3f899953 |
if (which_R == R_NilValue) {
|
7c263469 |
tally_R = parse_all(tally_iit, param);
|
3f899953 |
} else {
SEXP chr_R = VECTOR_ELT(which_R, 0);
int *start = INTEGER(VECTOR_ELT(which_R, 1));
int *end = INTEGER(VECTOR_ELT(which_R, 2));
|
7c263469 |
tally_R = parse_some(tally_iit, param, chr_R, start, end);
|
3f899953 |
}
return tally_R;
}
|