git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@44644 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,6 +1,6 @@ |
1 | 1 |
Package: rtracklayer |
2 | 2 |
Title: R interface to genome browsers and their annotation tracks |
3 |
-Version: 1.7.6 |
|
3 |
+Version: 1.7.7 |
|
4 | 4 |
Author: Michael Lawrence, Vince Carey, Robert Gentleman |
5 | 5 |
Depends: R (>= 2.7.0), methods, RCurl |
6 | 6 |
Imports: XML (>= 1.98-0), IRanges (>= 1.3.50), Biostrings (>= 2.11.14), BSgenome (>= 1.13.10) |
... | ... |
@@ -14,5 +14,5 @@ Description: Extensible framework for interacting with multiple genome |
14 | 14 |
viewport. |
15 | 15 |
Maintainer: Michael Lawrence <mflawren@fhcrc.org> |
16 | 16 |
License: Artistic-2.0 |
17 |
-Collate: io.R web.R ranges.R browser.R gff.R ucsc.R bed.R wig.R utils.R |
|
17 |
+Collate: io.R web.R ranges.R browser.R gff.R ucsc.R bed.R wig.R utils.R bigWig.R |
|
18 | 18 |
biocViews: Annotation,Visualization,DataImport |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-#useDynLib(rtracklayer, .registration = TRUE) |
|
1 |
+useDynLib(rtracklayer, .registration = TRUE) |
|
2 | 2 |
|
3 | 3 |
import(RCurl, XML) |
4 | 4 |
|
... | ... |
@@ -16,15 +16,13 @@ importMethodsFrom("IRanges", start, end, "start<-", "end<-", |
16 | 16 |
range, split, lapply, ranges, values, unlist, as.list, |
17 | 17 |
isDisjoint, width, space, as.data.frame, rbind, universe, |
18 | 18 |
"universe<-", score, "score<-", as.vector, |
19 |
- "ranges<-", colnames, "colnames<-") |
|
19 |
+ "ranges<-", colnames, "colnames<-", queryHits, findOverlaps) |
|
20 | 20 |
|
21 | 21 |
exportClasses("BrowserSession", "BrowserView", |
22 | 22 |
"UCSCSession", "UCSCView", |
23 | 23 |
"UCSCData", "TrackLine", "BasicTrackLine", "GraphTrackLine", |
24 | 24 |
"Bed15TrackLine", "UCSCTrackModes", |
25 |
- "RangedSelection" |
|
26 |
- ##"BigWigSelection" |
|
27 |
- ) |
|
25 |
+ "RangedSelection", "BigWigSelection") |
|
28 | 26 |
|
29 | 27 |
exportMethods("activeView", "activeView<-", "blocks", "browseGenome", |
30 | 28 |
"browserSession", "browserSession<-", |
... | ... |
@@ -51,9 +49,8 @@ exportMethods("activeView", "activeView<-", "blocks", "browseGenome", |
51 | 49 |
|
52 | 50 |
export("genomeBrowsers", "start", "end", "strand", "start<-", "end<-", |
53 | 51 |
"ranges", "GenomicRanges", "GenomicData", "GenomicSelection", "IRanges", |
54 |
- "RangedDataList", score, "score<-", "as.data.frame", "ucscGenomes" |
|
55 |
- ##"export.bw", "import.bw" |
|
56 |
- ) |
|
52 |
+ "RangedDataList", score, "score<-", "as.data.frame", "ucscGenomes", |
|
53 |
+ "export.bw", "import.bw") |
|
57 | 54 |
|
58 | 55 |
## the S4-connection compatibility layer |
59 | 56 |
exportClasses("connection", |
60 | 57 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,106 @@ |
1 |
+### UCSC bigWig format |
|
2 |
+ |
|
3 |
+.allowedColNames <- list(bigWig = "score") |
|
4 |
+ |
|
5 |
+.validateColNames <- function(object, format) { |
|
6 |
+ allowedColNames <- .allowedColNames[[format]] |
|
7 |
+ invalidColNames <- setdiff(colnames(object), allowedColNames) |
|
8 |
+ if (length(invalidColNames)) |
|
9 |
+ paste("Column names", |
|
10 |
+ paste("'", invalidColNames, "'", sep = "", collapse=", "), |
|
11 |
+ "are invalid for the", format, "format.") |
|
12 |
+ else NULL |
|
13 |
+} |
|
14 |
+ |
|
15 |
+setClass("BigWigSelection", contains = "RangedSelection") |
|
16 |
+ |
|
17 |
+setValidity("BigWigSelection", |
|
18 |
+ function(object) { |
|
19 |
+ .validateColNames(object, "bigWig") |
|
20 |
+ }) |
|
21 |
+ |
|
22 |
+ |
|
23 |
+setGeneric("export.bw", |
|
24 |
+ function(object, con, |
|
25 |
+ dataFormat = c("auto", "variableStep", "fixedStep", |
|
26 |
+ "bedGraph"), |
|
27 |
+ seqlengths = NULL, compress = TRUE, ...) |
|
28 |
+ standardGeneric("export.bw")) |
|
29 |
+ |
|
30 |
+setMethod("export.bw", "ANY", |
|
31 |
+ function(object, con, |
|
32 |
+ dataFormat = c("auto", "variableStep", "fixedStep", |
|
33 |
+ "bedGraph"), |
|
34 |
+ seqlengths, compress, genome, ...) |
|
35 |
+ { |
|
36 |
+ rd <- as(object, "RangedData") |
|
37 |
+ if (!is.null(genome)) |
|
38 |
+ genome(rd) <- genome |
|
39 |
+ export.bw(rd, con, dataFormat, seqlengths, compress, ...) |
|
40 |
+ }) |
|
41 |
+ |
|
42 |
+setMethod("export.bw", c("RangedData", "character"), |
|
43 |
+ function(object, con, |
|
44 |
+ dataFormat = c("auto", "variableStep", "fixedStep", |
|
45 |
+ "bedGraph"), |
|
46 |
+ seqlengths, compress) |
|
47 |
+ { |
|
48 |
+ score <- score(object) |
|
49 |
+ if (!is.numeric(score) || any(is.na(score))) |
|
50 |
+ stop("The score must be numeric, without any NA's") |
|
51 |
+ if (!IRanges:::isTRUEorFALSE(compress)) |
|
52 |
+ stop("'compress' must be TRUE or FALSE") |
|
53 |
+ if (is.null(seqlengths) && !is.null(genome(object))) |
|
54 |
+ seqlengths <- seqlengths(.genomeForID(genome(object))) |
|
55 |
+ if (is.null(seqlengths)) |
|
56 |
+ stop("Unable to determine seqlengths; either specify ", |
|
57 |
+ "'seqlengths' or specify a genome on 'object' that ", |
|
58 |
+ "is known to BSgenome") |
|
59 |
+ if (!is.numeric(seqlengths) || |
|
60 |
+ !all(names(object) %in% names(seqlengths))) |
|
61 |
+ stop("seqlengths must be numeric and indicate a length for ", |
|
62 |
+ "each sequence in 'object'") |
|
63 |
+ sectionPtr <- NULL # keep adding to the same linked list |
|
64 |
+ .bigWigWriter <- function(chromData, con, dataFormat) { |
|
65 |
+ sectionPtr <<- .Call(BWGSectionList_add, sectionPtr, |
|
66 |
+ names(chromData)[1], |
|
67 |
+ as(ranges(chromData)[[1]], "IRanges"), |
|
68 |
+ as.numeric(score(chromData)), dataFormat) |
|
69 |
+ } |
|
70 |
+ dataFormat <- match.arg(dataFormat) |
|
71 |
+ if (dataFormat == "auto") |
|
72 |
+ format <- chooseGraphType(object) |
|
73 |
+ else format <- dataFormat |
|
74 |
+ on.exit(.Call(BWGSectionList_cleanup, sectionPtr)) |
|
75 |
+ if (format == "bedGraph") |
|
76 |
+ lapply(object, .bigWigWriter, con, dataFormat) |
|
77 |
+ else export.wigLines(object, con, dataFormat, .bigWigWriter) |
|
78 |
+ storage.mode(seqlengths) <- "integer" |
|
79 |
+ .Call(BWGSectionList_write, sectionPtr, seqlengths, |
|
80 |
+ compress, con) |
|
81 |
+ }) |
|
82 |
+ |
|
83 |
+setGeneric("import.bw", |
|
84 |
+ function(con, selection = GenomicSelection(...), ...) |
|
85 |
+ standardGeneric("import.bw")) |
|
86 |
+ |
|
87 |
+ |
|
88 |
+setMethod("import.bw", "character", |
|
89 |
+ function(con, selection = GenomicSelection(...), ...) |
|
90 |
+ { |
|
91 |
+ if (!IRanges:::isSingleString(con)) |
|
92 |
+ stop("'con' must be a single string, specifying a path") |
|
93 |
+ selection <- try(as(selection, "RangedSelection")) |
|
94 |
+ if (is.character(selection)) |
|
95 |
+ stop("'selection' must be coercible to RangedSelection") |
|
96 |
+ normRanges <- as(ranges(selection), "NormalIRangesList") |
|
97 |
+ rd <- .Call(BWGFile_query, con, as.list(normRanges), |
|
98 |
+ colnames(selection)) |
|
99 |
+ ## Unfortunately, the bigWig query API is such that we can |
|
100 |
+ ## end up with multiple hits. |
|
101 |
+ if (any(width(rd) > 2)) { |
|
102 |
+ hits <- queryHits(findOverlaps(ranges(rd), normRanges)) |
|
103 |
+ rd <- rd[!duplicated(hits),] |
|
104 |
+ } |
|
105 |
+ rd |
|
106 |
+ }) |
1 | 4 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,10 @@ |
1 |
+PKG_OBJECTS = \ |
|
2 |
+ IRanges_stubs.o R_init_rtracklayer.o bigWig.o |
|
3 |
+ |
|
4 |
+UCSC_OBJECTS = \ |
|
5 |
+ bPlusTree.o bbiRead.o bbiWrite.o bwgCreate.o bwgQuery.o \ |
|
6 |
+ cirTree.o common.o errCatch.o errabort.o hash.o linefile.o localmem.o \ |
|
7 |
+ sqlNum.o zlibFace.o dystring.o hmmstats.o obscure.o pipeline.o \ |
|
8 |
+ rangeTree.o rbTree.o memalloc.o dlist.o udc.o net.o bits.o _cheapcgi.o \ |
|
9 |
+ internet.o https.o base64.o verbose.o os.o wildcmp.o _portimpl.o |
|
10 |
+ |
0 | 11 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,20 @@ |
1 |
+#include "rtracklayer.h" |
|
2 |
+#include "bigWig.h" |
|
3 |
+ |
|
4 |
+#include <R_ext/Rdynload.h> |
|
5 |
+ |
|
6 |
+#define CALLMETHOD_DEF(fun, numArgs) {#fun, (DL_FUNC) &fun, numArgs} |
|
7 |
+ |
|
8 |
+static const R_CallMethodDef callMethods[] = { |
|
9 |
+/* bigWig.c */ |
|
10 |
+ CALLMETHOD_DEF(BWGSectionList_add, 5), |
|
11 |
+ CALLMETHOD_DEF(BWGSectionList_write, 4), |
|
12 |
+ CALLMETHOD_DEF(BWGSectionList_cleanup, 1), |
|
13 |
+ CALLMETHOD_DEF(BWGFile_query, 3), |
|
14 |
+ {NULL, NULL, 0} |
|
15 |
+}; |
|
16 |
+ |
|
17 |
+void R_init_rtracklayer(DllInfo *info) |
|
18 |
+{ |
|
19 |
+ R_registerRoutines(info, NULL, callMethods, NULL, NULL); |
|
20 |
+} |
0 | 21 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,217 @@ |
1 |
+#include "ucsc/common.h" |
|
2 |
+#include "ucsc/linefile.h" |
|
3 |
+#include "ucsc/localmem.h" |
|
4 |
+#include "ucsc/hash.h" |
|
5 |
+#include "ucsc/bbiFile.h" |
|
6 |
+#include "ucsc/bwgInternal.h" |
|
7 |
+#include "ucsc/_bwgInternal.h" |
|
8 |
+ |
|
9 |
+#include "bigWig.h" |
|
10 |
+ |
|
11 |
+static struct bwgBedGraphItem * |
|
12 |
+createBedGraphItems(int *start, int *width, double *score, int len, |
|
13 |
+ struct lm *lm) |
|
14 |
+{ |
|
15 |
+ struct bwgBedGraphItem *itemList = NULL, *item; |
|
16 |
+ int i; |
|
17 |
+ for (i=0; i<len; ++i) |
|
18 |
+ { |
|
19 |
+ lmAllocVar(lm, item); |
|
20 |
+ item->end = start[i] + width[i] - 1; |
|
21 |
+ item->start = start[i] - 1; |
|
22 |
+ item->val = score[i]; |
|
23 |
+ slAddHead(&itemList, item); |
|
24 |
+ } |
|
25 |
+ slReverse(&itemList); |
|
26 |
+ return itemList; |
|
27 |
+} |
|
28 |
+ |
|
29 |
+static struct bwgVariableStepPacked * |
|
30 |
+createVariableStepItems(int *start, double *score, int len, struct lm *lm) |
|
31 |
+{ |
|
32 |
+ struct bwgVariableStepPacked *packed; |
|
33 |
+ lmAllocArray(lm, packed, len); |
|
34 |
+ int i; |
|
35 |
+ for (i=0; i<len; ++i) |
|
36 |
+ { |
|
37 |
+ packed[i].start = start[i] - 1; |
|
38 |
+ packed[i].val = score[i]; |
|
39 |
+ } |
|
40 |
+ return packed; |
|
41 |
+} |
|
42 |
+ |
|
43 |
+static struct bwgFixedStepPacked * |
|
44 |
+createFixedStepItems(double *score, int len, struct lm *lm) |
|
45 |
+{ |
|
46 |
+ struct bwgFixedStepPacked *packed; |
|
47 |
+ lmAllocArray(lm, packed, len); |
|
48 |
+ int i; |
|
49 |
+ for (i=0; i<len; ++i) |
|
50 |
+ { |
|
51 |
+ packed[i].val = score[i]; |
|
52 |
+ } |
|
53 |
+ return packed; |
|
54 |
+} |
|
55 |
+ |
|
56 |
+static struct bwgSection * |
|
57 |
+createBWGSection(const char *seq, int *start, int *width, double *score, |
|
58 |
+ int len, enum bwgSectionType type, struct lm *lm) |
|
59 |
+{ |
|
60 |
+ struct bwgSection *section; |
|
61 |
+ lmAllocVar(lm, section); |
|
62 |
+ section->chrom = (char *)seq; |
|
63 |
+ section->start = start[0] - 1; |
|
64 |
+ section->end = start[len-1] + width[len-1] - 1; |
|
65 |
+ section->type = type; |
|
66 |
+ section->itemSpan = width[0]; |
|
67 |
+ if (type == bwgTypeFixedStep) { |
|
68 |
+ section->items.fixedStepPacked = createFixedStepItems(score, len, lm); |
|
69 |
+ section->itemStep = len > 1 ? start[1] - start[0] : 0; |
|
70 |
+ } else if (type == bwgTypeVariableStep) { |
|
71 |
+ section->items.variableStepPacked = |
|
72 |
+ createVariableStepItems(start, score, len, lm); |
|
73 |
+ } else section->items.bedGraphList = |
|
74 |
+ createBedGraphItems(start, width, score, len, lm); |
|
75 |
+ section->itemCount = len; |
|
76 |
+ return section; |
|
77 |
+} |
|
78 |
+ |
|
79 |
+static int itemsPerSlot = 512; |
|
80 |
+static int blockSize = 1024; |
|
81 |
+ |
|
82 |
+/* --- .Call ENTRY POINT --- */ |
|
83 |
+ |
|
84 |
+SEXP BWGSectionList_add(SEXP r_sections, SEXP r_seq, SEXP r_ranges, |
|
85 |
+ SEXP r_score, SEXP r_format) |
|
86 |
+{ |
|
87 |
+ struct bwgSection *sections = NULL; |
|
88 |
+ const char *seq = CHAR(asChar(r_seq)); |
|
89 |
+ int *start = INTEGER(get_IRanges_start(r_ranges)); |
|
90 |
+ int *width = INTEGER(get_IRanges_width(r_ranges)); |
|
91 |
+ double *score = REAL(r_score); |
|
92 |
+ const char *format = CHAR(asChar(r_format)); |
|
93 |
+ int num = get_IRanges_length(r_ranges); |
|
94 |
+ int numLeft = num; |
|
95 |
+ SEXP ans; |
|
96 |
+ struct lm *lm; |
|
97 |
+ |
|
98 |
+ enum bwgSectionType type = bwgTypeBedGraph; |
|
99 |
+ if (sameString(format, "fixedStep")) |
|
100 |
+ type = bwgTypeFixedStep; |
|
101 |
+ else if (sameString(format, "variableStep")) |
|
102 |
+ type = bwgTypeVariableStep; |
|
103 |
+ |
|
104 |
+ if (r_sections != R_NilValue) { |
|
105 |
+ sections = R_ExternalPtrAddr(r_sections); |
|
106 |
+ lm = R_ExternalPtrAddr(R_ExternalPtrTag(r_sections)); |
|
107 |
+ } else lm = lmInit(0); |
|
108 |
+ |
|
109 |
+ while(numLeft) { |
|
110 |
+ int numSection = numLeft > itemsPerSlot ? itemsPerSlot : numLeft; |
|
111 |
+ numLeft -= numSection; |
|
112 |
+ slAddHead(§ions, |
|
113 |
+ createBWGSection(seq, start, width, score, numSection, type, lm)); |
|
114 |
+ start += numSection; |
|
115 |
+ width += numSection; |
|
116 |
+ score += numSection; |
|
117 |
+ } |
|
118 |
+ |
|
119 |
+ PROTECT(ans = R_MakeExternalPtr(sections, R_NilValue, R_NilValue)); |
|
120 |
+ R_SetExternalPtrTag(ans, R_MakeExternalPtr(lm, R_NilValue, R_NilValue)); |
|
121 |
+ UNPROTECT(1); |
|
122 |
+ |
|
123 |
+ return ans; |
|
124 |
+} |
|
125 |
+ |
|
126 |
+static struct hash *createIntHash(SEXP v) { |
|
127 |
+ struct hash *hash = hashNew(0); |
|
128 |
+ SEXP names = getAttrib(v, R_NamesSymbol); |
|
129 |
+ for (int i = 0; i < length(v); i++) |
|
130 |
+ hashAddInt(hash, (char *)CHAR(STRING_ELT(names, i)), INTEGER(v)[i]); |
|
131 |
+ return hash; |
|
132 |
+} |
|
133 |
+ |
|
134 |
+/* --- .Call ENTRY POINT --- */ |
|
135 |
+SEXP BWGSectionList_write(SEXP r_sections, SEXP r_seqlengths, SEXP r_compress, |
|
136 |
+ SEXP r_file) |
|
137 |
+{ |
|
138 |
+ struct bwgSection *sections = NULL; |
|
139 |
+ struct hash *lenHash = createIntHash(r_seqlengths); |
|
140 |
+ if (r_sections != R_NilValue) { |
|
141 |
+ sections = R_ExternalPtrAddr(r_sections); |
|
142 |
+ slReverse(§ions); |
|
143 |
+ } |
|
144 |
+ bwgCreate(sections, lenHash, blockSize, itemsPerSlot, asLogical(r_compress), |
|
145 |
+ (char *)CHAR(asChar(r_file))); |
|
146 |
+ freeHash(&lenHash); |
|
147 |
+ return R_NilValue; |
|
148 |
+} |
|
149 |
+ |
|
150 |
+/* --- .Call ENTRY POINT --- */ |
|
151 |
+SEXP BWGSectionList_cleanup(SEXP r_sections) |
|
152 |
+{ |
|
153 |
+ if (r_sections != R_NilValue) { |
|
154 |
+ struct lm *lm = R_ExternalPtrAddr(R_ExternalPtrTag(r_sections)); |
|
155 |
+ lmCleanup(&lm); |
|
156 |
+ } |
|
157 |
+ return R_NilValue; |
|
158 |
+} |
|
159 |
+ |
|
160 |
+/* --- .Call ENTRY POINT --- */ |
|
161 |
+SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_colnames) { |
|
162 |
+ struct bbiFile * file = bigWigFileOpen((char *)CHAR(asChar(r_filename))); |
|
163 |
+ SEXP chromNames = getAttrib(r_ranges, R_NamesSymbol); |
|
164 |
+ int nchroms = length(r_ranges); |
|
165 |
+ SEXP rangesList, rangesListEls, dataFrameList, dataFrameListEls, ans; |
|
166 |
+ const char *var_names[] = { "score", "" }; |
|
167 |
+ struct lm *lm = lmInit(0); |
|
168 |
+ |
|
169 |
+ struct bbiInterval *hits = NULL; |
|
170 |
+ |
|
171 |
+ PROTECT(rangesListEls = allocVector(VECSXP, nchroms)); |
|
172 |
+ setAttrib(rangesListEls, R_NamesSymbol, chromNames); |
|
173 |
+ PROTECT(dataFrameListEls = allocVector(VECSXP, nchroms)); |
|
174 |
+ setAttrib(dataFrameListEls, R_NamesSymbol, chromNames); |
|
175 |
+ |
|
176 |
+ for (int i = 0; i < length(r_ranges); i++) { |
|
177 |
+ SEXP localRanges = VECTOR_ELT(r_ranges, i); |
|
178 |
+ int nranges = get_IRanges_length(localRanges); |
|
179 |
+ int *start = INTEGER(get_IRanges_start(localRanges)); |
|
180 |
+ int *width = INTEGER(get_IRanges_width(localRanges)); |
|
181 |
+ for (int j = 0; j < nranges; j++) { |
|
182 |
+ struct bbiInterval *queryHits = |
|
183 |
+ bigWigIntervalQuery(file, (char *)CHAR(STRING_ELT(chromNames, i)), |
|
184 |
+ start[j] - 1, start[j] - 1 + width[j], lm); |
|
185 |
+ hits = slCat(queryHits, hits); |
|
186 |
+ } |
|
187 |
+ int nhits = slCount(hits); |
|
188 |
+ SEXP ans_start, ans_width, ans_score, ans_score_l; |
|
189 |
+ PROTECT(ans_start = allocVector(INTSXP, nhits)); |
|
190 |
+ PROTECT(ans_width = allocVector(INTSXP, nhits)); |
|
191 |
+ PROTECT(ans_score_l = mkNamed(VECSXP, var_names)); |
|
192 |
+ ans_score = allocVector(REALSXP, nhits); |
|
193 |
+ SET_VECTOR_ELT(ans_score_l, 0, ans_score); |
|
194 |
+ for (int j = 0; j < nhits; j++, hits = hits->next) { |
|
195 |
+ INTEGER(ans_start)[j] = hits->start + 1; |
|
196 |
+ INTEGER(ans_width)[j] = hits->end - hits->start; |
|
197 |
+ REAL(ans_score)[j] = hits->val; |
|
198 |
+ } |
|
199 |
+ SET_VECTOR_ELT(rangesListEls, i, |
|
200 |
+ new_IRanges("IRanges", ans_start, ans_width, R_NilValue)); |
|
201 |
+ SET_VECTOR_ELT(dataFrameListEls, i, |
|
202 |
+ new_DataFrame("DataFrame", ans_score_l, R_NilValue, |
|
203 |
+ ScalarInteger(nhits))); |
|
204 |
+ UNPROTECT(3); |
|
205 |
+ } |
|
206 |
+ |
|
207 |
+ bbiFileClose(&file); |
|
208 |
+ |
|
209 |
+ PROTECT(dataFrameList = |
|
210 |
+ new_SimpleList("SimpleSplitDataFrameList", dataFrameListEls)); |
|
211 |
+ PROTECT(rangesList = new_SimpleList("SimpleRangesList", rangesListEls)); |
|
212 |
+ ans = new_RangedData("RangedData", rangesList, dataFrameList); |
|
213 |
+ |
|
214 |
+ UNPROTECT(4); |
|
215 |
+ lmCleanup(&lm); |
|
216 |
+ return ans; |
|
217 |
+} |
0 | 218 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,15 @@ |
1 |
+#ifndef BIG_WIG_H |
|
2 |
+#define BIG_WIG_H |
|
3 |
+ |
|
4 |
+#include "rtracklayer.h" |
|
5 |
+ |
|
6 |
+/* The .Call entry points */ |
|
7 |
+ |
|
8 |
+SEXP BWGSectionList_add(SEXP r_sections, SEXP r_seq, SEXP r_ranges, |
|
9 |
+ SEXP r_score, SEXP r_format); |
|
10 |
+SEXP BWGSectionList_write(SEXP r_sections, SEXP r_seqlengths, SEXP r_compress, |
|
11 |
+ SEXP r_file); |
|
12 |
+SEXP BWGSectionList_cleanup(SEXP r_sections); |
|
13 |
+SEXP BWGFile_query(SEXP r_filename, SEXP r_ranges, SEXP r_colnames); |
|
14 |
+ |
|
15 |
+#endif |
0 | 9 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,74 @@ |
1 |
+/* Stuff that should be in ucsc/bwgInternal.h, but wasn't, so |
|
2 |
+ rtracklayer put it here. */ |
|
3 |
+ |
|
4 |
+struct bwgBedGraphItem |
|
5 |
+/* An bedGraph-type item in a bwgSection. */ |
|
6 |
+{ |
|
7 |
+ struct bwgBedGraphItem *next; /* Next in list. */ |
|
8 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
9 |
+ float val; /* Value. */ |
|
10 |
+}; |
|
11 |
+ |
|
12 |
+struct bwgVariableStepItem |
|
13 |
+/* An variableStep type item in a bwgSection. */ |
|
14 |
+{ |
|
15 |
+ struct bwgVariableStepItem *next; /* Next in list. */ |
|
16 |
+ bits32 start; /* Start position in chromosome. */ |
|
17 |
+ float val; /* Value. */ |
|
18 |
+}; |
|
19 |
+ |
|
20 |
+struct bwgVariableStepPacked |
|
21 |
+/* An variableStep type item in a bwgSection. */ |
|
22 |
+{ |
|
23 |
+ bits32 start; /* Start position in chromosome. */ |
|
24 |
+ float val; /* Value. */ |
|
25 |
+}; |
|
26 |
+ |
|
27 |
+struct bwgFixedStepItem |
|
28 |
+/* An fixedStep type item in a bwgSection. */ |
|
29 |
+{ |
|
30 |
+ struct bwgFixedStepItem *next; /* Next in list. */ |
|
31 |
+ float val; /* Value. */ |
|
32 |
+}; |
|
33 |
+ |
|
34 |
+struct bwgFixedStepPacked |
|
35 |
+/* An fixedStep type item in a bwgSection. */ |
|
36 |
+{ |
|
37 |
+ float val; /* Value. */ |
|
38 |
+}; |
|
39 |
+ |
|
40 |
+union bwgItem |
|
41 |
+/* Union of item pointers for all possible section types. */ |
|
42 |
+{ |
|
43 |
+ struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
44 |
+ struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
45 |
+ struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
46 |
+ /* No packed format for bedGraph... */ |
|
47 |
+}; |
|
48 |
+ |
|
49 |
+struct bwgSection |
|
50 |
+/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
51 |
+ * structure used by the bigWig creation code. See also bwgSection for the |
|
52 |
+ * structure returned by the bigWig reading code. */ |
|
53 |
+{ |
|
54 |
+ struct bwgSection *next; /* Next in list. */ |
|
55 |
+ char *chrom; /* Chromosome name. */ |
|
56 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
57 |
+ enum bwgSectionType type; |
|
58 |
+ union bwgItem items; /* List/array of items in this section. */ |
|
59 |
+ bits32 itemStep; /* Step within item if applicable. */ |
|
60 |
+ bits32 itemSpan; /* Item span if applicable. */ |
|
61 |
+ bits16 itemCount; /* Number of items in section. */ |
|
62 |
+ bits32 chromId; /* Unique small integer value for chromosome. */ |
|
63 |
+ bits64 fileOffset; /* Offset of section in file. */ |
|
64 |
+}; |
|
65 |
+ |
|
66 |
+void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
67 |
+ int blockSize, int itemsPerSlot, boolean doCompress, |
|
68 |
+ char *fileName); |
|
69 |
+ |
|
70 |
+struct bbiFile *bigWigFileOpen(char *fileName); |
|
71 |
+ |
|
72 |
+struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, |
|
73 |
+ bits32 start, bits32 end, |
|
74 |
+ struct lm *lm); |
0 | 75 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+/* rtracklayer took out this little utility from cheapcgi.c */ |
|
2 |
+ |
|
3 |
+#include "common.h" |
|
4 |
+ |
|
5 |
+void cgiDecode(char *in, char *out, int inLength) |
|
6 |
+/* Decode from cgi pluses-for-spaces format to normal. |
|
7 |
+ * Out will be a little shorter than in typically, and |
|
8 |
+ * can be the same buffer. */ |
|
9 |
+{ |
|
10 |
+ char c; |
|
11 |
+ int i; |
|
12 |
+ for (i=0; i<inLength;++i) |
|
13 |
+ { |
|
14 |
+ c = *in++; |
|
15 |
+ if (c == '+') |
|
16 |
+ *out++ = ' '; |
|
17 |
+ else if (c == '%') |
|
18 |
+ { |
|
19 |
+ int code; |
|
20 |
+ if (sscanf(in, "%2x", &code) != 1) |
|
21 |
+ code = '?'; |
|
22 |
+ in += 2; |
|
23 |
+ i += 2; |
|
24 |
+ *out++ = code; |
|
25 |
+ } |
|
26 |
+ else |
|
27 |
+ *out++ = c; |
|
28 |
+ } |
|
29 |
+ *out++ = 0; |
|
30 |
+} |
0 | 31 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,51 @@ |
1 |
+/* rtracklayer took this utility out of portimpl.c */ |
|
2 |
+ |
|
3 |
+#include "common.h" |
|
4 |
+#include "portable.h" |
|
5 |
+ |
|
6 |
+void makeDirsOnPath(char *pathName) |
|
7 |
+/* Create directory specified by pathName. If pathName contains |
|
8 |
+ * slashes, create directory at each level of path if it doesn't |
|
9 |
+ * already exist. Abort with error message if there's a problem. |
|
10 |
+ * (It's not considered a problem for the directory to already |
|
11 |
+ * exist. ) */ |
|
12 |
+{ |
|
13 |
+/* Save a copy of current directory. */ |
|
14 |
+char *curDir = cloneString(getCurrentDir()); |
|
15 |
+ |
|
16 |
+/* Return current directory. Abort if it fails. */ |
|
17 |
+/* Make local copy of pathName. */ |
|
18 |
+int len = strlen(pathName); |
|
19 |
+char pathCopy[len+1]; |
|
20 |
+strcpy(pathCopy, pathName); |
|
21 |
+ |
|
22 |
+/* Start at root if it's an absolute path name. */ |
|
23 |
+char *s = pathCopy, *e; |
|
24 |
+if (pathCopy[0] == '/') |
|
25 |
+ { |
|
26 |
+setCurrentDir("/"); |
|
27 |
+} |
|
28 |
+ |
|
29 |
+/* Step through it one slash at a time - changing directory if possible |
|
30 |
+ * else making directory if possible, else dying. */ |
|
31 |
+for (; !isEmpty(s); s = e) |
|
32 |
+ { |
|
33 |
+/* Find end of this section and terminate string there. */ |
|
34 |
+e = strchr(s, '/'); |
|
35 |
+if (e != NULL) |
|
36 |
+ *e++ = 0; |
|
37 |
+ |
|
38 |
+/* Cd there. If that fails mkdir there and then cd there. */ |
|
39 |
+if (!maybeSetCurrentDir(s)) |
|
40 |
+ { |
|
41 |
+if (!makeDir(s)) |
|
42 |
+ { |
|
43 |
+break; |
|
44 |
+} |
|
45 |
+setCurrentDir(s); |
|
46 |
+} |
|
47 |
+} |
|
48 |
+setCurrentDir(curDir); |
|
49 |
+freeMem(curDir); |
|
50 |
+} |
0 | 51 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,440 @@ |
1 |
+/* bptFile - B+ Trees. These are a method of indexing data similar to binary trees, but |
|
2 |
+ * with many children rather than just two at each node. They work well when stored on disk, |
|
3 |
+ * since typically only two or three disk accesses are needed to locate any particular |
|
4 |
+ * piece of data. This implementation is *just* for disk based storage. For memory |
|
5 |
+ * use the rbTree instead. Currently the implementation is just useful for data warehouse |
|
6 |
+ * type applications. That is it implements a function to create a b+ tree from bulk data |
|
7 |
+ * (bptFileCreate) and a function to lookup a value given a key (bptFileFind) but not functions |
|
8 |
+ * to add or delete individual items. |
|
9 |
+ * |
|
10 |
+ * The layout of the file on disk is: |
|
11 |
+ * header |
|
12 |
+ * root node |
|
13 |
+ * (other nodes) |
|
14 |
+ * In general when the tree is first built the higher level nodes are stored before the |
|
15 |
+ * lower level nodes. It is possible if a b+ tree is dynamically updated for this to |
|
16 |
+ * no longer be strictly true, but actually currently the b+ tree code here doesn't implement |
|
17 |
+ * dynamic updates - it just creates a b+ tree from a sorted list. |
|
18 |
+ * |
|
19 |
+ * Each node can be one of two types - index or leaf. The index nodes contain pointers |
|
20 |
+ * to child nodes. The leaf nodes contain the actual data. |
|
21 |
+ * |
|
22 |
+ * The layout of the file header is: |
|
23 |
+ * <magic number> 4 bytes - The value bptSig (0x78CA8C91) |
|
24 |
+ * <block size> 4 bytes - Number of children per block (not byte size of block) |
|
25 |
+ * <key size> 4 bytes - Number of significant bytes in key |
|
26 |
+ * <val size> 4 bytes - Number of bytes in value |
|
27 |
+ * <item count> 8 bytes - Number of items in index |
|
28 |
+ * <reserved2> 4 bytes - Always 0 for now |
|
29 |
+ * <reserved3> 4 bytes - Always 0 for now |
|
30 |
+ * The magic number may be byte-swapped, in which case all numbers in the file |
|
31 |
+ * need to be byte-swapped. |
|
32 |
+ * |
|
33 |
+ * The nodes start with a header: |
|
34 |
+ * <is leaf> 1 byte - 1 for leaf nodes, 0 for index nodes. |
|
35 |
+ * <reserved> 1 byte - Always 0 for now. |
|
36 |
+ * <count> 2 bytes - The number of children/items in node |
|
37 |
+ * This is followed by count items. For the index nodes the items are |
|
38 |
+ * <key> key size bytes - always written most significant byte first |
|
39 |
+ * <offset> 8 bytes - Offset of child node in index file. |
|
40 |
+ * For leaf nodes the items are |
|
41 |
+ * <key> key size bytes - always written most significant byte first |
|
42 |
+ * <val> val sized bytes - the value associated with the key. |
|
43 |
+ * Note in general the leaf nodes may not be the same size as the index nodes, though in |
|
44 |
+ * the important case where the values are file offsets they will be. |
|
45 |
+ */ |
|
46 |
+ |
|
47 |
+#include "common.h" |
|
48 |
+#include "sig.h" |
|
49 |
+#include "udc.h" |
|
50 |
+#include "bPlusTree.h" |
|
51 |
+ |
|
52 |
+/* This section of code deals with locating a value in a b+ tree. */ |
|
53 |
+ |
|
54 |
+struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc) |
|
55 |
+/* Open up index file on previously open file, with header at current file position. */ |
|
56 |
+{ |
|
57 |
+/* Open file and allocate structure to hold info from header etc. */ |
|
58 |
+struct bptFile *bpt = needMem(sizeof(*bpt)); |
|
59 |
+bpt->fileName = fileName; |
|
60 |
+bpt->udc = udc; |
|
61 |
+ |
|
62 |
+/* Read magic number at head of file and use it to see if we are proper file type, and |
|
63 |
+ * see if we are byte-swapped. */ |
|
64 |
+bits32 magic; |
|
65 |
+boolean isSwapped = FALSE; |
|
66 |
+udcMustReadOne(udc, magic); |
|
67 |
+if (magic != bptSig) |
|
68 |
+ { |
|
69 |
+ magic = byteSwap32(magic); |
|
70 |
+ isSwapped = bpt->isSwapped = TRUE; |
|
71 |
+ if (magic != bptSig) |
|
72 |
+ errAbort("%s is not a bpt b-plus tree index file", fileName); |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+/* Read rest of defined bits of header, byte swapping as needed. */ |
|
76 |
+bpt->blockSize = udcReadBits32(udc, isSwapped); |
|
77 |
+bpt->keySize = udcReadBits32(udc, isSwapped); |
|
78 |
+bpt->valSize = udcReadBits32(udc, isSwapped); |
|
79 |
+bpt->itemCount = udcReadBits64(udc, isSwapped); |
|
80 |
+ |
|
81 |
+/* Skip over reserved bits of header. */ |
|
82 |
+bits32 reserved32; |
|
83 |
+udcMustReadOne(udc, reserved32); |
|
84 |
+udcMustReadOne(udc, reserved32); |
|
85 |
+ |
|
86 |
+/* Save position of root block of b+ tree. */ |
|
87 |
+bpt->rootOffset = udcTell(udc); |
|
88 |
+ |
|
89 |
+return bpt; |
|
90 |
+} |
|
91 |
+ |
|
92 |
+void bptFileDetach(struct bptFile **pBpt) |
|
93 |
+/* Detach and free up cirTree file opened with cirTreeFileAttach. */ |
|
94 |
+{ |
|
95 |
+freez(pBpt); |
|
96 |
+} |
|
97 |
+ |
|
98 |
+struct bptFile *bptFileOpen(char *fileName) |
|
99 |
+/* Open up index file - reading header and verifying things. */ |
|
100 |
+{ |
|
101 |
+return bptFileAttach(cloneString(fileName), udcFileOpen(fileName, udcDefaultDir())); |
|
102 |
+} |
|
103 |
+ |
|
104 |
+void bptFileClose(struct bptFile **pBpt) |
|
105 |
+/* Close down and deallocate index file. */ |
|
106 |
+{ |
|
107 |
+struct bptFile *bpt = *pBpt; |
|
108 |
+if (bpt != NULL) |
|
109 |
+ { |
|
110 |
+ udcFileClose(&bpt->udc); |
|
111 |
+ freeMem(bpt->fileName); |
|
112 |
+ bptFileDetach(pBpt); |
|
113 |
+ } |
|
114 |
+} |
|
115 |
+ |
|
116 |
+static boolean rFind(struct bptFile *bpt, bits64 blockStart, void *key, void *val) |
|
117 |
+/* Find value corresponding to key. If found copy value to memory pointed to by val and return |
|
118 |
+ * true. Otherwise return false. */ |
|
119 |
+{ |
|
120 |
+/* Seek to start of block. */ |
|
121 |
+udcSeek(bpt->udc, blockStart); |
|
122 |
+ |
|
123 |
+/* Read block header. */ |
|
124 |
+UBYTE isLeaf; |
|
125 |
+UBYTE reserved; |
|
126 |
+bits16 i, childCount; |
|
127 |
+udcMustReadOne(bpt->udc, isLeaf); |
|
128 |
+udcMustReadOne(bpt->udc, reserved); |
|
129 |
+boolean isSwapped = bpt->isSwapped; |
|
130 |
+childCount = udcReadBits16(bpt->udc, isSwapped); |
|
131 |
+ |
|
132 |
+UBYTE keyBuf[bpt->keySize]; /* Place to put a key, buffered on stack. */ |
|
133 |
+ |
|
134 |
+if (isLeaf) |
|
135 |
+ { |
|
136 |
+ for (i=0; i<childCount; ++i) |
|
137 |
+ { |
|
138 |
+ udcMustRead(bpt->udc, keyBuf, bpt->keySize); |
|
139 |
+ udcMustRead(bpt->udc, val, bpt->valSize); |
|
140 |
+ if (memcmp(key, keyBuf, bpt->keySize) == 0) |
|
141 |
+ return TRUE; |
|
142 |
+ } |
|
143 |
+ return FALSE; |
|
144 |
+ } |
|
145 |
+else |
|
146 |
+ { |
|
147 |
+ /* Read and discard first key. */ |
|
148 |
+ udcMustRead(bpt->udc, keyBuf, bpt->keySize); |
|
149 |
+ |
|
150 |
+ /* Scan info for first file offset. */ |
|
151 |
+ bits64 fileOffset = udcReadBits64(bpt->udc, isSwapped); |
|
152 |
+ |
|
153 |
+ /* Loop through remainder. */ |
|
154 |
+ for (i=1; i<childCount; ++i) |
|
155 |
+ { |
|
156 |
+ udcMustRead(bpt->udc, keyBuf, bpt->keySize); |
|
157 |
+ if (memcmp(key, keyBuf, bpt->keySize) < 0) |
|
158 |
+ break; |
|
159 |
+ fileOffset = udcReadBits64(bpt->udc, isSwapped); |
|
160 |
+ } |
|
161 |
+ return rFind(bpt, fileOffset, key, val); |
|
162 |
+ } |
|
163 |
+} |
|
164 |
+ |
|
165 |
+void rTraverse(struct bptFile *bpt, bits64 blockStart, void *context, |
|
166 |
+ void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) |
|
167 |
+/* Recursively go across tree, calling callback at leaves. */ |
|
168 |
+{ |
|
169 |
+/* Seek to start of block. */ |
|
170 |
+udcSeek(bpt->udc, blockStart); |
|
171 |
+ |
|
172 |
+/* Read block header. */ |
|
173 |
+UBYTE isLeaf; |
|
174 |
+UBYTE reserved; |
|
175 |
+bits16 i, childCount; |
|
176 |
+udcMustReadOne(bpt->udc, isLeaf); |
|
177 |
+udcMustReadOne(bpt->udc, reserved); |
|
178 |
+boolean isSwapped = bpt->isSwapped; |
|
179 |
+childCount = udcReadBits16(bpt->udc, isSwapped); |
|
180 |
+ |
|
181 |
+char keyBuf[bpt->keySize], valBuf[bpt->valSize]; |
|
182 |
+if (isLeaf) |
|
183 |
+ { |
|
184 |
+ for (i=0; i<childCount; ++i) |
|
185 |
+ { |
|
186 |
+ udcMustRead(bpt->udc, keyBuf, bpt->keySize); |
|
187 |
+ udcMustRead(bpt->udc, valBuf, bpt->valSize); |
|
188 |
+ callback(context, keyBuf, bpt->keySize, valBuf, bpt->valSize); |
|
189 |
+ } |
|
190 |
+ } |
|
191 |
+else |
|
192 |
+ { |
|
193 |
+ bits64 fileOffsets[childCount]; |
|
194 |
+ /* Loop through to get file offsets of children. */ |
|
195 |
+ for (i=0; i<childCount; ++i) |
|
196 |
+ { |
|
197 |
+ udcMustRead(bpt->udc, keyBuf, bpt->keySize); |
|
198 |
+ fileOffsets[i] = udcReadBits64(bpt->udc, isSwapped); |
|
199 |
+ } |
|
200 |
+ /* Loop through recursing on child offsets. */ |
|
201 |
+ for (i=0; i<childCount; ++i) |
|
202 |
+ rTraverse(bpt, fileOffsets[i], context, callback); |
|
203 |
+ } |
|
204 |
+} |
|
205 |
+ |
|
206 |
+boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize) |
|
207 |
+/* Find value associated with key. Return TRUE if it's found. |
|
208 |
+* Parameters: |
|
209 |
+* bpt - file handle returned by bptFileOpen |
|
210 |
+* key - pointer to key string, which needs to be bpt->keySize long |
|
211 |
+* val - pointer to where to put retrieved value |
|
212 |
+*/ |
|
213 |
+{ |
|
214 |
+/* Check key size vs. file key size, and act appropriately. If need be copy key to a local |
|
215 |
+ * buffer and zero-extend it. */ |
|
216 |
+if (keySize > bpt->keySize) |
|
217 |
+ return FALSE; |
|
218 |
+char keyBuf[keySize]; |
|
219 |
+if (keySize != bpt->keySize) |
|
220 |
+ { |
|
221 |
+ memcpy(keyBuf, key, keySize); |
|
222 |
+ memset(keyBuf+keySize, 0, bpt->keySize - keySize); |
|
223 |
+ key = keyBuf; |
|
224 |
+ } |
|
225 |
+ |
|
226 |
+/* Make sure the valSize matches what's in file. */ |
|
227 |
+if (valSize != bpt->valSize) |
|
228 |
+ errAbort("Value size mismatch between bptFileFind (valSize=%d) and %s (valSize=%d)", |
|
229 |
+ valSize, bpt->fileName, bpt->valSize); |
|
230 |
+ |
|
231 |
+/* Call recursive finder. */ |
|
232 |
+return rFind(bpt, bpt->rootOffset, key, val); |
|
233 |
+} |
|
234 |
+ |
|
235 |
+void bptFileTraverse(struct bptFile *bpt, void *context, |
|
236 |
+ void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ) |
|
237 |
+/* Traverse bPlusTree on file, calling supplied callback function at each |
|
238 |
+ * leaf item. */ |
|
239 |
+{ |
|
240 |
+return rTraverse(bpt, bpt->rootOffset, context, callback); |
|
241 |
+} |
|
242 |
+ |
|
243 |
+ |
|
244 |
+/* This section of code deals with making balanced b+ trees given a sorted array as input. |
|
245 |
+ * The difficult part is mostly just calculating the offsets of various things. As an example |
|
246 |
+ * if you had the sorted array: |
|
247 |
+ * 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
|
248 |
+ * and wanted to create a tree of block size 4, the resulting tree would have three levels |
|
249 |
+ * as so: |
|
250 |
+ * 01 17 |
|
251 |
+ * 01 05 09 13 17 21 25 |
|
252 |
+ * 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
|
253 |
+ */ |
|
254 |
+ |
|
255 |
+static int xToY(int x, unsigned y) |
|
256 |
+/* Return x to the Y power, with y usually small. */ |
|
257 |
+{ |
|
258 |
+int i, val = 1; |
|
259 |
+for (i=0; i<y; ++i) |
|
260 |
+ val *= x; |
|
261 |
+return val; |
|
262 |
+} |
|
263 |
+ |
|
264 |
+static int bptCountLevels(int maxBlockSize, int itemCount) |
|
265 |
+/* Count up number of levels needed in tree of given maximum block size. */ |
|
266 |
+{ |
|
267 |
+int levels = 1; |
|
268 |
+while (itemCount > maxBlockSize) |
|
269 |
+ { |
|
270 |
+ itemCount = (itemCount + maxBlockSize - 1) / maxBlockSize; |
|
271 |
+ levels += 1; |
|
272 |
+ } |
|
273 |
+return levels; |
|
274 |
+} |
|
275 |
+ |
|
276 |
+ |
|
277 |
+static bits32 writeIndexLevel(bits16 blockSize, |
|
278 |
+ void *itemArray, int itemSize, int itemCount, |
|
279 |
+ bits32 indexOffset, int level, |
|
280 |
+ void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, bits32 valSize, |
|
281 |
+ FILE *f) |
|
282 |
+/* Write out a non-leaf level. */ |
|
283 |
+{ |
|
284 |
+char *items = itemArray; |
|
285 |
+ |
|
286 |
+/* Calculate number of nodes to write at this level. */ |
|
287 |
+int slotSizePer = xToY(blockSize, level); // Number of items per slot in node |
|
288 |
+int nodeSizePer = slotSizePer * blockSize; // Number of items per node |
|
289 |
+int nodeCount = (itemCount + nodeSizePer - 1)/nodeSizePer; |
|
290 |
+ |
|
291 |
+/* Calculate sizes and offsets. */ |
|
292 |
+int bytesInIndexBlock = (bptBlockHeaderSize + blockSize * (keySize+sizeof(bits64))); |
|
293 |
+int bytesInLeafBlock = (bptBlockHeaderSize + blockSize * (keySize+valSize)); |
|
294 |
+bits64 bytesInNextLevelBlock = (level == 1 ? bytesInLeafBlock : bytesInIndexBlock); |
|
295 |
+bits64 levelSize = nodeCount * bytesInIndexBlock; |
|
296 |
+bits64 endLevel = indexOffset + levelSize; |
|
297 |
+bits64 nextChild = endLevel; |
|
298 |
+ |
|
299 |
+UBYTE isLeaf = FALSE; |
|
300 |
+UBYTE reserved = 0; |
|
301 |
+ |
|
302 |
+int i,j; |
|
303 |
+char keyBuf[keySize+1]; |
|
304 |
+keyBuf[keySize] = 0; |
|
305 |
+for (i=0; i<itemCount; i += nodeSizePer) |
|
306 |
+ { |
|
307 |
+ /* Calculate size of this block */ |
|
308 |
+ bits16 countOne = (itemCount - i + slotSizePer - 1)/slotSizePer; |
|
309 |
+ if (countOne > blockSize) |
|
310 |
+ countOne = blockSize; |
|
311 |
+ |
|
312 |
+ /* Write block header. */ |
|
313 |
+ writeOne(f, isLeaf); |
|
314 |
+ writeOne(f, reserved); |
|
315 |
+ writeOne(f, countOne); |
|
316 |
+ |
|
317 |
+ /* Write out the slots that are used one by one, and do sanity check. */ |
|
318 |
+ int slotsUsed = 0; |
|
319 |
+ int endIx = i + nodeSizePer; |
|
320 |
+ if (endIx > itemCount) |
|
321 |
+ endIx = itemCount; |
|
322 |
+ for (j=i; j<endIx; j += slotSizePer) |
|
323 |
+ { |
|
324 |
+ void *item = items + j*itemSize; |
|
325 |
+ memset(keyBuf, 0, keySize); |
|
326 |
+ (*fetchKey)(item, keyBuf); |
|
327 |
+ mustWrite(f, keyBuf, keySize); |
|
328 |
+ writeOne(f, nextChild); |
|
329 |
+ nextChild += bytesInNextLevelBlock; |
|
330 |
+ ++slotsUsed; |
|
331 |
+ } |
|
332 |
+ assert(slotsUsed == countOne); |
|
333 |
+ |
|
334 |
+ /* Write out empty slots as all zero. */ |
|
335 |
+ int slotSize = keySize + sizeof(bits64); |
|
336 |
+ for (j=countOne; j<blockSize; ++j) |
|
337 |
+ repeatCharOut(f, 0, slotSize); |
|
338 |
+ } |
|
339 |
+return endLevel; |
|
340 |
+} |
|
341 |
+ |
|
342 |
+static void writeLeafLevel(bits16 blockSize, void *itemArray, int itemSize, int itemCount, |
|
343 |
+ void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, |
|
344 |
+ void* (*fetchVal)(const void *va), bits32 valSize, |
|
345 |
+ FILE *f) |
|
346 |
+/* Write out leaf level blocks. */ |
|
347 |
+{ |
|
348 |
+char *items = itemArray; |
|
349 |
+int i,j; |
|
350 |
+UBYTE isLeaf = TRUE; |
|
351 |
+UBYTE reserved = 0; |
|
352 |
+bits16 countOne; |
|
353 |
+int countLeft = itemCount; |
|
354 |
+char keyBuf[keySize+1]; |
|
355 |
+keyBuf[keySize] = 0; |
|
356 |
+for (i=0; i<itemCount; i += countOne) |
|
357 |
+ { |
|
358 |
+ /* Write block header */ |
|
359 |
+ if (countLeft > blockSize) |
|
360 |
+ countOne = blockSize; |
|
361 |
+ else |
|
362 |
+ countOne = countLeft; |
|
363 |
+ writeOne(f, isLeaf); |
|
364 |
+ writeOne(f, reserved); |
|
365 |
+ writeOne(f, countOne); |
|
366 |
+ |
|
367 |
+ /* Write out position in genome and in file for each item. */ |
|
368 |
+ for (j=0; j<countOne; ++j) |
|
369 |
+ { |
|
370 |
+ assert(i+j < itemCount); |
|
371 |
+ void *item = items + (i+j)*itemSize; |
|
372 |
+ memset(keyBuf, 0, keySize); |
|
373 |
+ (*fetchKey)(item, keyBuf); |
|
374 |
+ mustWrite(f, keyBuf, keySize); |
|
375 |
+ mustWrite(f, (*fetchVal)(item), valSize); |
|
376 |
+ } |
|
377 |
+ |
|
378 |
+ /* Pad out any unused bits of last block with zeroes. */ |
|
379 |
+ int slotSize = keySize + valSize; |
|
380 |
+ for (j=countOne; j<blockSize; ++j) |
|
381 |
+ repeatCharOut(f, 0, slotSize); |
|
382 |
+ |
|
383 |
+ countLeft -= countOne; |
|
384 |
+ } |
|
385 |
+} |
|
386 |
+ |
|
387 |
+void bptFileBulkIndexToOpenFile(void *itemArray, int itemSize, bits64 itemCount, bits32 blockSize, |
|
388 |
+ void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, |
|
389 |
+ void* (*fetchVal)(const void *va), bits32 valSize, FILE *f) |
|
390 |
+/* Create a b+ tree index from a sorted array, writing output starting at current position |
|
391 |
+ * of an already open file. See bptFileCreate for explanation of parameters. */ |
|
392 |
+{ |
|
393 |
+bits32 magic = bptSig; |
|
394 |
+bits32 reserved = 0; |
|
395 |
+writeOne(f, magic); |
|
396 |
+writeOne(f, blockSize); |
|
397 |
+writeOne(f, keySize); |
|
398 |
+writeOne(f, valSize); |
|
399 |
+writeOne(f, itemCount); |
|
400 |
+writeOne(f, reserved); |
|
401 |
+writeOne(f, reserved); |
|
402 |
+bits64 indexOffset = ftell(f); |
|
403 |
+ |
|
404 |
+/* Write non-leaf nodes. */ |
|
405 |
+int levels = bptCountLevels(blockSize, itemCount); |
|
406 |
+int i; |
|
407 |
+for (i=levels-1; i > 0; --i) |
|
408 |
+ { |
|
409 |
+ bits32 endLevelOffset = writeIndexLevel(blockSize, itemArray, itemSize, itemCount, indexOffset, |
|
410 |
+ i, fetchKey, keySize, valSize, f); |
|
411 |
+ indexOffset = ftell(f); |
|
412 |
+ if (endLevelOffset != indexOffset) |
|
413 |
+ internalErr(); |
|
414 |
+ } |
|
415 |
+ |
|
416 |
+/* Write leaf nodes */ |
|
417 |
+writeLeafLevel(blockSize, itemArray, itemSize, itemCount, |
|
418 |
+ fetchKey, keySize, fetchVal, valSize, f); |
|
419 |
+} |
|
420 |
+ |
|
421 |
+void bptFileCreate( |
|
422 |
+ void *itemArray, /* Sorted array of things to index. */ |
|
423 |
+ int itemSize, /* Size of each element in array. */ |
|
424 |
+ bits64 itemCount, /* Number of elements in array. */ |
|
425 |
+ bits32 blockSize, /* B+ tree block size - # of children for each node. */ |
|
426 |
+ void (*fetchKey)(const void *va, char *keyBuf), /* Given item, copy key to keyBuf */ |
|
427 |
+ bits32 keySize, /* Size of key */ |
|
428 |
+ void* (*fetchVal)(const void *va), /* Given item, return pointer to value */ |
|
429 |
+ bits32 valSize, /* Size of value */ |
|
430 |
+ char *fileName) /* Name of output file. */ |
|
431 |
+/* Create a b+ tree index file from a sorted array. */ |
|
432 |
+ |
|
433 |
+{ |
|
434 |
+/* Open file and write header. */ |
|
435 |
+FILE *f = mustOpen(fileName, "wb"); |
|
436 |
+bptFileBulkIndexToOpenFile(itemArray, itemSize, itemCount, blockSize, fetchKey, keySize, |
|
437 |
+ fetchVal, valSize, f); |
|
438 |
+carefulClose(&f); |
|
439 |
+} |
|
440 |
+ |
0 | 441 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,114 @@ |
1 |
+/* bptFile - B+ Trees. These are a method of indexing data similar to binary trees, but |
|
2 |
+ * with many children rather than just two at each node. They work well when stored on disk, |
|
3 |
+ * since typically only two or three disk accesses are needed to locate any particular |
|
4 |
+ * piece of data. This implementation is *just* for disk based storage. For memory |
|
5 |
+ * use the rbTree instead. Currently the implementation is just useful for data warehouse |
|
6 |
+ * type applications. That is it implements a function to create a b+ tree from bulk data |
|
7 |
+ * (bptFileCreate) and a function to lookup a value given a key (bptFileFind) but not functions |
|
8 |
+ * to add or delete individual items. |
|
9 |
+ * |
|
10 |
+ * |
|
11 |
+ * The layout of the file on disk is: |
|
12 |
+ * header |
|
13 |
+ * root node |
|
14 |
+ * (other nodes) |
|
15 |
+ * In general when the tree is first built the higher level nodes are stored before the |
|
16 |
+ * lower level nodes. It is possible if a b+ tree is dynamically updated for this to |
|
17 |
+ * no longer be strictly true, but actually currently the b+ tree code here doesn't implement |
|
18 |
+ * dynamic updates - it just creates a b+ tree from a sorted list. |
|
19 |
+ * |
|
20 |
+ * Each node can be one of two types - index or leaf. The index nodes contain pointers |
|
21 |
+ * to child nodes. The leaf nodes contain the actual data. |
|
22 |
+ * |
|
23 |
+ * The layout of the file header is: |
|
24 |
+ * <magic number> 4 bytes - The value bptSig (0x78CA8C91) |
|
25 |
+ * <block size> 4 bytes - Number of children per block (not byte size of block) |
|
26 |
+ * <key size> 4 bytes - Number of significant bytes in key |
|
27 |
+ * <val size> 4 bytes - Number of bytes in value |
|
28 |
+ * <item count> 8 bytes - Number of items in index |
|
29 |
+ * <reserved2> 4 bytes - Always 0 for now |
|
30 |
+ * <reserved3> 4 bytes - Always 0 for now |
|
31 |
+ * The magic number may be byte-swapped, in which case all numbers in the file |
|
32 |
+ * need to be byte-swapped. |
|
33 |
+ * |
|
34 |
+ * The nodes start with a header: |
|
35 |
+ * <is leaf> 1 byte - 1 for leaf nodes, 0 for index nodes. |
|
36 |
+ * <reserved> 1 byte - Always 0 for now. |
|
37 |
+ * <count> 2 bytes - The number of children/items in node |
|
38 |
+ * This is followed by count items. For the index nodes the items are |
|
39 |
+ * <key> key size bytes - always written most significant byte first |
|
40 |
+ * <offset> 8 bytes - Offset of child node in index file. |
|
41 |
+ * For leaf nodes the items are |
|
42 |
+ * <key> key size bytes - always written most significant byte first |
|
43 |
+ * <val> val sized bytes - the value associated with the key. |
|
44 |
+ * Note in general the leaf nodes may not be the same size as the index nodes, though in |
|
45 |
+ * the important case where the values are file offsets they will be. |
|
46 |
+ */ |
|
47 |
+ |
|
48 |
+#ifndef BPLUSTREE_H |
|
49 |
+#define BPLUSTREE_H |
|
50 |
+ |
|
51 |
+struct bptFile |
|
52 |
+/* B+ tree index file handle. */ |
|
53 |
+ { |
|
54 |
+ struct bptFile *next; /* Next in list of index files if any. */ |
|
55 |
+ char *fileName; /* Name of file - for error reporting. */ |
|
56 |
+ struct udcFile *udc; /* Open file pointer. */ |
|
57 |
+ bits32 blockSize; /* Size of block. */ |
|
58 |
+ bits32 keySize; /* Size of keys. */ |
|
59 |
+ bits32 valSize; /* Size of values. */ |
|
60 |
+ bits64 itemCount; /* Number of items indexed. */ |
|
61 |
+ boolean isSwapped; /* If TRUE need to byte swap everything. */ |
|
62 |
+ bits64 rootOffset; /* Offset of root block. */ |
|
63 |
+ }; |
|
64 |
+ |
|
65 |
+struct bptFile *bptFileOpen(char *fileName); |
|
66 |
+/* Open up index file - reading header and verifying things. */ |
|
67 |
+ |
|
68 |
+void bptFileClose(struct bptFile **pBpt); |
|
69 |
+/* Close down and deallocate index file. */ |
|
70 |
+ |
|
71 |
+struct bptFile *bptFileAttach(char *fileName, struct udcFile *udc); |
|
72 |
+/* Open up index file on previously open file, with header at current file position. */ |
|
73 |
+ |
|
74 |
+void bptFileDetach(struct bptFile **pBpt); |
|
75 |
+/* Detach and free up bptFile opened with bptFileAttach. */ |
|
76 |
+ |
|
77 |
+boolean bptFileFind(struct bptFile *bpt, void *key, int keySize, void *val, int valSize); |
|
78 |
+/* Find value associated with key. Return TRUE if it's found. |
|
79 |
+* Parameters: |
|
80 |
+* bpt - file handle returned by bptFileOpen |
|
81 |
+* key - pointer to key string |
|
82 |
+* keySize - size of key. Normally just strlen(key) |
|
83 |
+* val - pointer to where to put retrieved value |
|
84 |
+* valSize - size of memory buffer that will hold val. Should match bpt->valSize. |
|
85 |
+*/ |
|
86 |
+ |
|
87 |
+void bptFileTraverse(struct bptFile *bpt, void *context, |
|
88 |
+ void (*callback)(void *context, void *key, int keySize, void *val, int valSize) ); |
|
89 |
+/* Traverse bPlusTree on file, calling supplied callback function at each |
|
90 |
+ * leaf item. */ |
|
91 |
+ |
|
92 |
+ |
|
93 |
+void bptFileCreate( |
|
94 |
+ void *itemArray, /* Sorted array of things to index. */ |
|
95 |
+ int itemSize, /* Size of each element in array. */ |
|
96 |
+ bits64 itemCount, /* Number of elements in array. */ |
|
97 |
+ bits32 blockSize, /* B+ tree block size - # of children for each node. */ |
|
98 |
+ void (*fetchKey)(const void *va, char *keyBuf), /* Given item, copy key to keyBuf */ |
|
99 |
+ bits32 keySize, /* Size of key */ |
|
100 |
+ void* (*fetchVal)(const void *va), /* Given item, return pointer to value */ |
|
101 |
+ bits32 valSize, /* Size of value */ |
|
102 |
+ char *fileName); /* Name of output file. */ |
|
103 |
+/* Create a b+ tree index file from a sorted array. */ |
|
104 |
+ |
|
105 |
+void bptFileBulkIndexToOpenFile(void *itemArray, int itemSize, bits64 itemCount, bits32 blockSize, |
|
106 |
+ void (*fetchKey)(const void *va, char *keyBuf), bits32 keySize, |
|
107 |
+ void* (*fetchVal)(const void *va), bits32 valSize, FILE *f); |
|
108 |
+/* Create a b+ tree index from a sorted array, writing output starting at current position |
|
109 |
+ * of an already open file. See bptFileCreate for explanation of parameters. */ |
|
110 |
+ |
|
111 |
+#define bptFileHeaderSize 32 |
|
112 |
+#define bptBlockHeaderSize 4 |
|
113 |
+ |
|
114 |
+#endif /* BPLUSTREE_H */ |
0 | 115 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,130 @@ |
1 |
+#include "common.h" |
|
2 |
+#include "base64.h" |
|
3 |
+ |
|
4 |
+static char const rcsid[] = "$Id: base64.c,v 1.6 2008/09/17 18:00:47 galt Exp $"; |
|
5 |
+ |
|
6 |
+char *base64Encode(char *input, size_t inplen) |
|
7 |
+/* Use base64 to encode a string. Returns one long encoded |
|
8 |
+ * string which need to be freeMem'd. Note: big-endian algorithm. |
|
9 |
+ * For some applications you may need to break the base64 output |
|
10 |
+ * of this function into lines no longer than 76 chars. |
|
11 |
+ */ |
|
12 |
+{ |
|
13 |
+char b64[] = B64CHARS; |
|
14 |
+int words = (inplen+2)/3; |
|
15 |
+int remains = inplen % 3; |
|
16 |
+char *result = (char *)needMem(4*words+1); |
|
17 |
+size_t i=0, j=0; |
|
18 |
+int word = 0; |
|
19 |
+unsigned char *p = (unsigned char*) input; |
|
20 |
+/* p must be unsigned char*, because without "unsigned", |
|
21 |
+sign extend messes up last group outputted |
|
22 |
+when the value of the chars following last in input |
|
23 |
+happens to be char 0x80 or higher */ |
|
24 |
+for(i=1; i<=words; i++) |
|
25 |
+ { |
|
26 |
+ word = 0; |
|
27 |
+ word |= *p++; |
|
28 |
+ word <<= 8; |
|
29 |
+ word |= *p++; |
|
30 |
+ word <<= 8; |
|
31 |
+ word |= *p++; |
|
32 |
+ if (i==words && remains>0) |
|
33 |
+ { |
|
34 |
+ word &= 0x00FFFF00; |
|
35 |
+ if (remains==1) |
|
36 |
+ word &= 0x00FF0000; |
|
37 |
+ } |
|
38 |
+ result[j++]=b64[word >> 18 & 0x3F]; |
|
39 |
+ result[j++]=b64[word >> 12 & 0x3F]; |
|
40 |
+ result[j++]=b64[word >> 6 & 0x3F]; |
|
41 |
+ result[j++]=b64[word & 0x3F]; |
|
42 |
+ } |
|
43 |
+result[j] = 0; |
|
44 |
+if (remains >0) result[j-1] = '='; |
|
45 |
+if (remains==1) result[j-2] = '='; |
|
46 |
+return result; |
|
47 |
+} |
|
48 |
+ |
|
49 |
+ |
|
50 |
+boolean base64Validate(char *input) |
|
51 |
+/* Return true if input is valid base64. |
|
52 |
+ * Note that the input string is changed by |
|
53 |
+ * eraseWhiteSpace(). */ |
|
54 |
+{ |
|
55 |
+size_t i = 0, l = 0; |
|
56 |
+char *p = input; |
|
57 |
+boolean validB64 = TRUE; |
|
58 |
+ |
|
59 |
+/* remove whitespace which is unnecessary and */ |
|
60 |
+eraseWhiteSpace(input); |
|
61 |
+ |
|
62 |
+l = strlen(p); |
|
63 |
+for(i=0;i<l;i++) |
|
64 |
+ { |
|
65 |
+ char c = ' '; |
|
66 |
+ if (!strchr(B64CHARS,c=*p++)) |
|
67 |
+ { |
|
68 |
+ if (c != '=') |
|
69 |
+ { |
|
70 |
+ validB64 = FALSE; |
|
71 |
+ break; |
|
72 |
+ } |
|
73 |
+ } |
|
74 |
+ } |
|
75 |
+if (l%4) |
|
76 |
+ validB64 = FALSE; |
|
77 |
+return validB64; |
|
78 |
+} |
|
79 |
+ |
|
80 |
+char *base64Decode(char *input, size_t *returnSize) |
|
81 |
+/* Use base64 to decode a string. Return decoded |
|
82 |
+ * string which will be freeMem'd. Note: big-endian algorithm. |
|
83 |
+ * Call eraseWhiteSpace() and check for invalid input |
|
84 |
+ * before passing in input if needed. |
|
85 |
+ * Optionally set return size for use with binary data. |
|
86 |
+ */ |
|
87 |
+{ |
|
88 |
+static int *map=NULL; |
|
89 |
+char b64[] = B64CHARS; |
|
90 |
+size_t inplen = strlen(input); |
|
91 |
+int words = (inplen+3)/4; |
|
92 |
+char *result = (char *)needMem(3*words+1); |
|
93 |
+size_t i=0, j=0; |
|
94 |
+int word = 0; |
|
95 |
+char *p = input; |
|
96 |
+ |
|
97 |
+if (!map) |
|
98 |
+ { |
|
99 |
+ int i = 0; |
|
100 |
+ map = needMem(256*sizeof(int)); |
|
101 |
+ for (i = 0; i < 256; ++i) |
|
102 |
+ { |
|
103 |
+ map[i]=0; |
|
104 |
+ } |
|
105 |
+ for (i = 0; i < 64; ++i) |
|
106 |
+ { |
|
107 |
+ map[(int)b64[i]]=i; |
|
108 |
+ } |
|
109 |
+ } |
|
110 |
+for(i=0; i<words; i++) |
|
111 |
+ { |
|
112 |
+ word = 0; |
|
113 |
+ word |= map[(int)*p++]; |
|
114 |
+ word <<= 6; |
|
115 |
+ word |= map[(int)*p++]; |
|
116 |
+ word <<= 6; |
|
117 |
+ word |= map[(int)*p++]; |
|
118 |
+ word <<= 6; |
|
119 |
+ word |= map[(int)*p++]; |
|
120 |
+ result[j++]=word >> 16 & 0xFF; |
|
121 |
+ result[j++]=word >> 8 & 0xFF; |
|
122 |
+ result[j++]=word & 0xFF; |
|
123 |
+ } |
|
124 |
+result[j] = 0; |
|
125 |
+if (returnSize) |
|
126 |
+ *returnSize = j; |
|
127 |
+ |
|
128 |
+return result; |
|
129 |
+} |
|
130 |
+ |
0 | 131 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,30 @@ |
1 |
+/* Base64 encoding and decoding. |
|
2 |
+ * by Galt Barber */ |
|
3 |
+ |
|
4 |
+#ifndef BASE64_H |
|
5 |
+#define BASE64_H |
|
6 |
+ |
|
7 |
+#define B64CHARS "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" |
|
8 |
+ |
|
9 |
+char *base64Encode(char *input, size_t inplen); |
|
10 |
+/* Use base64 to encode a string. Returns one long encoded |
|
11 |
+ * string which need to be freeMem'd. Note: big-endian algorithm. |
|
12 |
+ * For some applications you may need to break the base64 output |
|
13 |
+ * of this function into lines no longer than 76 chars. |
|
14 |
+ */ |
|
15 |
+ |
|
16 |
+boolean base64Validate(char *input); |
|
17 |
+/* Return true if input is valid base64. |
|
18 |
+ * Note that the input string is changed by |
|
19 |
+ * eraseWhiteSpace(). */ |
|
20 |
+ |
|
21 |
+char *base64Decode(char *input, size_t *returnSize); |
|
22 |
+/* Use base64 to decode a string. Return decoded |
|
23 |
+ * string which will be freeMem'd. Note: big-endian algorithm. |
|
24 |
+ * Call eraseWhiteSpace() and check for invalid input |
|
25 |
+ * before passing in input if needed. |
|
26 |
+ * Optionally set retun size for use with binary data. |
|
27 |
+ */ |
|
28 |
+ |
|
29 |
+#endif /* BASE64_H */ |
|
30 |
+ |
0 | 31 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,352 @@ |
1 |
+/* bbiFile - Big Binary Indexed file. Stuff that's common between bigWig and bigBed. */ |
|
2 |
+ |
|
3 |
+#ifndef BBIFILE_H |
|
4 |
+#define BBIFILE_H |
|
5 |
+ |
|
6 |
+#include "cirTree.h" |
|
7 |
+ |
|
8 |
+/* bigWig/bigBed file structure: |
|
9 |
+ * fixedWidthHeader |
|
10 |
+ * magic# 4 bytes |
|
11 |
+ * version 2 bytes |
|
12 |
+ * zoomLevels 2 bytes |
|
13 |
+ * chromosomeTreeOffset 8 bytes |
|
14 |
+ * fullDataOffset 8 bytes |
|
15 |
+ * fullIndexOffset 8 bytes |
|
16 |
+ * fieldCount 2 bytes (for bigWig 0) |
|
17 |
+ * definedFieldCount 2 bytes (for bigWig 0) |
|
18 |
+ * autoSqlOffset 8 bytes (for bigWig 0) (0 if no autoSql information) |
|
19 |
+ * totalSummaryOffset 8 bytes (0 in earlier versions of file lacking totalSummary) |
|
20 |
+ * uncompressBufSize 4 bytes (Size of uncompression buffer. 0 if uncompressed.) |
|
21 |
+ * reserved 8 bytes (0 for now) |
|
22 |
+ * zoomHeaders there are zoomLevels number of these |
|
23 |
+ * reductionLevel 4 bytes |
|
24 |
+ * reserved 4 bytes |
|
25 |
+ * dataOffset 8 bytes |
|
26 |
+ * indexOffset 8 bytes |
|
27 |
+ * autoSql string (zero terminated - only present if autoSqlOffset non-zero) |
|
28 |
+ * totalSummary - summary of all data in file - only present if totalSummaryOffset non-zero |
|
29 |
+ * basesCovered 8 bytes |
|
30 |
+ * minVal 8 bytes float (for bigBed minimum depth of coverage) |
|
31 |
+ * maxVal 8 bytes float (for bigBed maximum depth of coverage) |
|
32 |
+ * sumData 8 bytes float (for bigBed sum of coverage) |
|
33 |
+ * sumSquared 8 bytes float (for bigBed sum of coverage squared) |
|
34 |
+ * chromosome b+ tree bPlusTree index |
|
35 |
+ * full data |
|
36 |
+ * sectionCount 8 bytes (item count for bigBeds) |
|
37 |
+ * section data section count sections, of three types (bed data for bigBeds) |
|
38 |
+ * full index cirTree index |
|
39 |
+ * zoom info one of these for each zoom level |
|
40 |
+ * zoom data |
|
41 |
+ * zoomCount 4 bytes |
|
42 |
+ * zoom data there are zoomCount of these items |
|
43 |
+ * chromId 4 bytes |
|
44 |
+ * chromStart 4 bytes |
|
45 |
+ * chromEnd 4 bytes |
|
46 |
+ * validCount 4 bytes |
|
47 |
+ * minVal 4 bytes float |
|
48 |
+ * maxVal 4 bytes float |
|
49 |
+ * sumData 4 bytes float |
|
50 |
+ * sumSquares 4 bytes float |
|