git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@98880 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -1,11 +1,13 @@ |
1 | 1 |
/* bwgCreate - create big wig files. Implements write side of bwgInternal.h module. |
2 | 2 |
* See the comment in bwgInternal.h for a description of the file format. */ |
3 | 3 |
|
4 |
+/* Copyright (C) 2014 The Regents of the University of California |
|
5 |
+ * See README in this or parent directory for licensing information. */ |
|
6 |
+ |
|
4 | 7 |
#include "common.h" |
5 | 8 |
#include "linefile.h" |
6 | 9 |
#include "hash.h" |
7 |
-#include "localmem.h" |
|
8 |
-#include "errabort.h" |
|
10 |
+#include "errAbort.h" |
|
9 | 11 |
#include "sqlNum.h" |
10 | 12 |
#include "sig.h" |
11 | 13 |
#include "zlibFace.h" |
... | ... |
@@ -15,7 +17,6 @@ |
15 | 17 |
#include "bwgInternal.h" |
16 | 18 |
#include "bigWig.h" |
17 | 19 |
|
18 |
-static char const rcsid[] = "$Id: bwgCreate.c,v 1.27 2010/06/10 20:13:29 braney Exp $"; |
|
19 | 20 |
|
20 | 21 |
static int bwgBedGraphItemCmp(const void *va, const void *vb) |
21 | 22 |
/* Compare to sort based on query start. */ |
... | ... |
@@ -628,6 +629,78 @@ slFreeList(&uniqList); |
628 | 629 |
*retMaxChromNameSize = maxChromNameSize; |
629 | 630 |
} |
630 | 631 |
|
632 |
+static int bwgStrcmp (const void * A, const void * B) { |
|
633 |
+ char * stringA = *((char **) A); |
|
634 |
+ char * stringB = *((char **) B); |
|
635 |
+ return strcmp(stringA, stringB); |
|
636 |
+} |
|
637 |
+ |
|
638 |
+void bwgMakeAllChromInfo(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
639 |
+ int *retChromCount, struct bbiChromInfo **retChromArray, |
|
640 |
+ int *retMaxChromNameSize) |
|
641 |
+/* Fill in chromId field in sectionList. Return array of chromosome name/ids. |
|
642 |
+ * The chromSizeHash is keyed by name, and has int values. */ |
|
643 |
+{ |
|
644 |
+/* Build up list of unique chromosome names. */ |
|
645 |
+int maxChromNameSize = 0; |
|
646 |
+ |
|
647 |
+/* Get list of values */ |
|
648 |
+int chromCount = chromSizeHash->elCount; |
|
649 |
+char ** chromName, ** chromNames; |
|
650 |
+AllocArray(chromNames, chromCount); |
|
651 |
+chromName = chromNames; |
|
652 |
+struct hashEl* el; |
|
653 |
+struct hashCookie cookie = hashFirst(chromSizeHash); |
|
654 |
+for (el = hashNext(&cookie); el; el = hashNext(&cookie)) { |
|
655 |
+ *chromName = el->name; |
|
656 |
+ if (strlen(el->name) > maxChromNameSize) |
|
657 |
+ maxChromNameSize = strlen(el->name); |
|
658 |
+ chromName++; |
|
659 |
+} |
|
660 |
+qsort(chromNames, chromCount, sizeof(char *), bwgStrcmp); |
|
661 |
+ |
|
662 |
+/* Allocate and fill in results array. */ |
|
663 |
+struct bbiChromInfo *chromArray; |
|
664 |
+AllocArray(chromArray, chromCount); |
|
665 |
+int i; |
|
666 |
+for (i = 0; i < chromCount; ++i) |
|
667 |
+ { |
|
668 |
+ chromArray[i].name = chromNames[i]; |
|
669 |
+ chromArray[i].id = i; |
|
670 |
+ chromArray[i].size = hashIntVal(chromSizeHash, chromNames[i]); |
|
671 |
+ } |
|
672 |
+ |
|
673 |
+// Assign IDs to sections: |
|
674 |
+struct bwgSection *section; |
|
675 |
+char *name = ""; |
|
676 |
+bits32 chromId = 0; |
|
677 |
+for (section = sectionList; section != NULL; section = section->next) |
|
678 |
+ { |
|
679 |
+ if (!sameString(section->chrom, name)) |
|
680 |
+ { |
|
681 |
+ for (i = 0; i < chromCount; ++i) |
|
682 |
+ { |
|
683 |
+ if (sameString(section->chrom, chromArray[i].name)) |
|
684 |
+ { |
|
685 |
+ section->chromId = i; |
|
686 |
+ break; |
|
687 |
+ } |
|
688 |
+ } |
|
689 |
+ if (i == chromCount) |
|
690 |
+ errAbort("Could not find %s in list of chromosomes\n", section->chrom); |
|
691 |
+ chromId = section->chromId; |
|
692 |
+ name = section->chrom; |
|
693 |
+ } |
|
694 |
+ else |
|
695 |
+ section->chromId = chromId; |
|
696 |
+ } |
|
697 |
+ |
|
698 |
+/* Clean up, set return values and go home. */ |
|
699 |
+*retChromCount = chromCount; |
|
700 |
+*retChromArray = chromArray; |
|
701 |
+*retMaxChromNameSize = maxChromNameSize; |
|
702 |
+} |
|
703 |
+ |
|
631 | 704 |
int bwgAverageResolution(struct bwgSection *sectionList) |
632 | 705 |
/* Return the average resolution seen in sectionList. */ |
633 | 706 |
{ |
... | ... |
@@ -798,46 +871,18 @@ slReverse(&outList); |
798 | 871 |
return outList; |
799 | 872 |
} |
800 | 873 |
|
801 |
-void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
802 |
- int blockSize, int itemsPerSlot, boolean doCompress, char *fileName) |
|
803 |
-/* Create a bigWig file out of a sorted sectionList. */ |
|
804 |
-{ |
|
805 |
-bits64 sectionCount = slCount(sectionList); |
|
806 |
-FILE *f = mustOpen(fileName, "wb"); |
|
807 |
-bits32 sig = bigWigSig; |
|
808 |
-bits16 version = bbiCurrentVersion; |
|
809 |
-bits16 summaryCount = 0; |
|
810 |
-bits16 reserved16 = 0; |
|
811 |
-bits32 reserved32 = 0; |
|
812 |
-bits64 reserved64 = 0; |
|
813 |
-bits64 dataOffset = 0, dataOffsetPos; |
|
814 |
-bits64 indexOffset = 0, indexOffsetPos; |
|
815 |
-bits64 chromTreeOffset = 0, chromTreeOffsetPos; |
|
816 |
-bits64 totalSummaryOffset = 0, totalSummaryOffsetPos; |
|
817 |
-bits32 uncompressBufSize = 0; |
|
818 |
-bits64 uncompressBufSizePos; |
|
819 |
-struct bbiSummary *reduceSummaries[10]; |
|
820 |
-bits32 reductionAmounts[10]; |
|
821 |
-bits64 reductionDataOffsetPos[10]; |
|
822 |
-bits64 reductionDataOffsets[10]; |
|
823 |
-bits64 reductionIndexOffsets[10]; |
|
824 |
-int i; |
|
825 |
- |
|
826 |
-/* Figure out chromosome ID's. */ |
|
827 |
-struct bbiChromInfo *chromInfoArray; |
|
828 |
-int chromCount, maxChromNameSize; |
|
829 |
-bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); |
|
830 |
- |
|
874 |
+static void bwgComputeDynamicSummaries(struct bwgSection *sectionList, struct bbiSummary ** reduceSummaries, bits16 * summaryCount, struct bbiChromInfo *chromInfoArray, int chromCount, bits32 * reductionAmounts, boolean doCompress) { |
|
831 | 875 |
/* Figure out initial summary level - starting with a summary 10 times the amount |
832 | 876 |
* of the smallest item. See if summarized data is smaller than half input data, if |
833 | 877 |
* not bump up reduction by a factor of 2 until it is, or until further summarying |
834 | 878 |
* yeilds no size reduction. */ |
879 |
+int i; |
|
835 | 880 |
int minRes = bwgAverageResolution(sectionList); |
836 | 881 |
int initialReduction = minRes*10; |
837 | 882 |
bits64 fullSize = bwgTotalSectionSize(sectionList); |
838 |
-bits64 maxReducedSize = fullSize/2; |
|
839 |
-struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL; |
|
840 | 883 |
bits64 lastSummarySize = 0, summarySize; |
884 |
+bits64 maxReducedSize = fullSize/2; |
|
885 |
+struct bbiSummary *summaryList = NULL; |
|
841 | 886 |
for (;;) |
842 | 887 |
{ |
843 | 888 |
summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction); |
... | ... |
@@ -862,31 +907,92 @@ for (;;) |
862 | 907 |
else |
863 | 908 |
break; |
864 | 909 |
} |
865 |
-summaryCount = 1; |
|
866 |
-reduceSummaries[0] = firstSummaryList = summaryList; |
|
910 |
+*summaryCount = 1; |
|
911 |
+reduceSummaries[0] = summaryList; |
|
867 | 912 |
reductionAmounts[0] = initialReduction; |
868 | 913 |
|
869 | 914 |
/* Now calculate up to 10 levels of further summary. */ |
870 | 915 |
bits64 reduction = initialReduction; |
871 |
-for (i=0; i<ArraySize(reduceSummaries)-1; i++) |
|
916 |
+for (i=0; i<9; i++) |
|
872 | 917 |
{ |
873 | 918 |
reduction *= 4; |
874 | 919 |
if (reduction > 1000000000) |
875 | 920 |
break; |
876 |
- summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, |
|
921 |
+ summaryList = bbiReduceSummaryList(reduceSummaries[*summaryCount-1], chromInfoArray, |
|
877 | 922 |
reduction); |
878 | 923 |
summarySize = bbiTotalSummarySize(summaryList); |
879 | 924 |
if (summarySize != lastSummarySize) |
880 | 925 |
{ |
881 |
- reduceSummaries[summaryCount] = summaryList; |
|
882 |
- reductionAmounts[summaryCount] = reduction; |
|
883 |
- ++summaryCount; |
|
926 |
+ reduceSummaries[*summaryCount] = summaryList; |
|
927 |
+ reductionAmounts[*summaryCount] = reduction; |
|
928 |
+ ++(*summaryCount); |
|
884 | 929 |
} |
885 | 930 |
int summaryItemCount = slCount(summaryList); |
886 | 931 |
if (summaryItemCount <= chromCount) |
887 | 932 |
break; |
888 | 933 |
} |
889 | 934 |
|
935 |
+} |
|
936 |
+ |
|
937 |
+static void bwgComputeFixedSummaries(struct bwgSection * sectionList, struct bbiSummary ** reduceSummaries, bits16 * summaryCount, struct bbiChromInfo *chromInfoArray, bits32 * reductionAmounts) { |
|
938 |
+// Hack: pre-defining summary levels, set off Ensembl default zoom levels |
|
939 |
+// The last two values of this array were extrapolated following Jim's formula |
|
940 |
+int i; |
|
941 |
+#define REDUCTION_COUNT 10 |
|
942 |
+bits32 presetReductions[REDUCTION_COUNT] = {30, 65, 130, 260, 450, 648, 950, 1296, 4800, 19200}; |
|
943 |
+ |
|
944 |
+bits64 reduction = reductionAmounts[0] = presetReductions[0]; |
|
945 |
+reduceSummaries[0] = bwgReduceSectionList(sectionList, chromInfoArray, presetReductions[0]); |
|
946 |
+ |
|
947 |
+for (i=1; i<REDUCTION_COUNT; i++) |
|
948 |
+ { |
|
949 |
+ reduction = reductionAmounts[i] = presetReductions[i]; |
|
950 |
+ reduceSummaries[i] = bbiReduceSummaryList(reduceSummaries[i-1], chromInfoArray, |
|
951 |
+ reduction); |
|
952 |
+ } |
|
953 |
+ |
|
954 |
+*summaryCount = REDUCTION_COUNT; |
|
955 |
+} |
|
956 |
+ |
|
957 |
+void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
958 |
+ int blockSize, int itemsPerSlot, boolean doCompress, boolean keepAllChromosomes, |
|
959 |
+ boolean fixedSummaries, char *fileName) |
|
960 |
+/* Create a bigWig file out of a sorted sectionList. */ |
|
961 |
+{ |
|
962 |
+bits64 sectionCount = slCount(sectionList); |
|
963 |
+FILE *f = mustOpen(fileName, "wb"); |
|
964 |
+bits32 sig = bigWigSig; |
|
965 |
+bits16 version = bbiCurrentVersion; |
|
966 |
+bits16 summaryCount = 0; |
|
967 |
+bits16 reserved16 = 0; |
|
968 |
+bits32 reserved32 = 0; |
|
969 |
+bits64 reserved64 = 0; |
|
970 |
+bits64 dataOffset = 0, dataOffsetPos; |
|
971 |
+bits64 indexOffset = 0, indexOffsetPos; |
|
972 |
+bits64 chromTreeOffset = 0, chromTreeOffsetPos; |
|
973 |
+bits64 totalSummaryOffset = 0, totalSummaryOffsetPos; |
|
974 |
+bits32 uncompressBufSize = 0; |
|
975 |
+bits64 uncompressBufSizePos; |
|
976 |
+struct bbiSummary *reduceSummaries[10]; |
|
977 |
+bits32 reductionAmounts[10]; |
|
978 |
+bits64 reductionDataOffsetPos[10]; |
|
979 |
+bits64 reductionDataOffsets[10]; |
|
980 |
+bits64 reductionIndexOffsets[10]; |
|
981 |
+int i; |
|
982 |
+ |
|
983 |
+/* Figure out chromosome ID's. */ |
|
984 |
+struct bbiChromInfo *chromInfoArray; |
|
985 |
+int chromCount, maxChromNameSize; |
|
986 |
+if (keepAllChromosomes) |
|
987 |
+ bwgMakeAllChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); |
|
988 |
+else |
|
989 |
+ bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); |
|
990 |
+ |
|
991 |
+if (fixedSummaries) |
|
992 |
+ bwgComputeFixedSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, reductionAmounts); |
|
993 |
+else |
|
994 |
+ bwgComputeDynamicSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, chromCount, reductionAmounts, doCompress); |
|
995 |
+ |
|
890 | 996 |
/* Write fixed header. */ |
891 | 997 |
writeOne(f, sig); |
892 | 998 |
writeOne(f, version); |
... | ... |
@@ -904,8 +1010,8 @@ totalSummaryOffsetPos = ftell(f); |
904 | 1010 |
writeOne(f, totalSummaryOffset); |
905 | 1011 |
uncompressBufSizePos = ftell(f); |
906 | 1012 |
writeOne(f, uncompressBufSize); |
907 |
-for (i=0; i<2; ++i) |
|
908 |
- writeOne(f, reserved32); |
|
1013 |
+writeOne(f, reserved64); /* nameIndexOffset */ |
|
1014 |
+assert(ftell(f) == 64); |
|
909 | 1015 |
|
910 | 1016 |
/* Write summary headers */ |
911 | 1017 |
for (i=0; i<summaryCount; ++i) |
... | ... |
@@ -964,7 +1070,7 @@ for (i=0; i<summaryCount; ++i) |
964 | 1070 |
} |
965 | 1071 |
|
966 | 1072 |
/* Calculate summary */ |
967 |
-struct bbiSummary *sum = firstSummaryList; |
|
1073 |
+struct bbiSummary *sum = reduceSummaries[0]; |
|
968 | 1074 |
if (sum != NULL) |
969 | 1075 |
{ |
970 | 1076 |
totalSum.validCount = sum->validCount; |
... | ... |
@@ -1087,13 +1193,15 @@ for (section = sectionList; section != NULL; section = nextSection) |
1087 | 1193 |
return sectionList; |
1088 | 1194 |
} |
1089 | 1195 |
|
1090 |
-void bigWigFileCreate( |
|
1196 |
+void bigWigFileCreateEx( |
|
1091 | 1197 |
char *inName, /* Input file in ascii wiggle format. */ |
1092 | 1198 |
char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ |
1093 | 1199 |
int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ |
1094 | 1200 |
int itemsPerSlot, /* Number of items in lowest level of tree. 512 is good. */ |
1095 | 1201 |
boolean clipDontDie, /* If TRUE then clip items off end of chrom rather than dying. */ |
1096 | 1202 |
boolean compress, /* If TRUE then compress data. */ |
1203 |
+ boolean keepAllChromosomes, /* If TRUE then store all chromosomes in chromosomal b-tree. */ |
|
1204 |
+ boolean fixedSummaries, /* If TRUE then impose fixed summary levels. */ |
|
1097 | 1205 |
char *outName) |
1098 | 1206 |
/* Convert ascii format wig file (in fixedStep, variableStep or bedGraph format) |
1099 | 1207 |
* to binary big wig format. */ |
... | ... |
@@ -1109,7 +1217,22 @@ struct lm *lm = lmInit(0); |
1109 | 1217 |
struct bwgSection *sectionList = bwgParseWig(inName, clipDontDie, chromSizeHash, itemsPerSlot, lm); |
1110 | 1218 |
if (sectionList == NULL) |
1111 | 1219 |
errAbort("%s is empty of data", inName); |
1112 |
-bwgCreate(sectionList, chromSizeHash, blockSize, itemsPerSlot, compress, outName); |
|
1220 |
+bwgCreate(sectionList, chromSizeHash, blockSize, itemsPerSlot, compress, keepAllChromosomes, fixedSummaries, outName); |
|
1113 | 1221 |
lmCleanup(&lm); |
1114 | 1222 |
} |
1115 | 1223 |
|
1224 |
+void bigWigFileCreate( |
|
1225 |
+ char *inName, /* Input file in ascii wiggle format. */ |
|
1226 |
+ char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ |
|
1227 |
+ int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ |
|
1228 |
+ int itemsPerSlot, /* Number of items in lowest level of tree. 512 is good. */ |
|
1229 |
+ boolean clipDontDie, /* If TRUE then clip items off end of chrom rather than dying. */ |
|
1230 |
+ boolean compress, /* If TRUE then compress data. */ |
|
1231 |
+ char *outName) |
|
1232 |
+/* Convert ascii format wig file (in fixedStep, variableStep or bedGraph format) |
|
1233 |
+ * to binary big wig format. */ |
|
1234 |
+{ |
|
1235 |
+bigWigFileCreateEx( inName, chromSizes, blockSize, itemsPerSlot, clipDontDie, |
|
1236 |
+ compress, FALSE, FALSE, outName); |
|
1237 |
+} |
|
1238 |
+ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@57674 bc3139a8-67e5-0310-9ffc-ced21a209358
... | ... |
@@ -15,70 +15,7 @@ |
15 | 15 |
#include "bwgInternal.h" |
16 | 16 |
#include "bigWig.h" |
17 | 17 |
|
18 |
-static char const rcsid[] = "$Id: bwgCreate.c,v 1.22 2009/11/25 07:17:25 kent Exp $"; |
|
19 |
- |
|
20 |
-struct bwgBedGraphItem |
|
21 |
-/* An bedGraph-type item in a bwgSection. */ |
|
22 |
- { |
|
23 |
- struct bwgBedGraphItem *next; /* Next in list. */ |
|
24 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
25 |
- float val; /* Value. */ |
|
26 |
- }; |
|
27 |
- |
|
28 |
-struct bwgVariableStepItem |
|
29 |
-/* An variableStep type item in a bwgSection. */ |
|
30 |
- { |
|
31 |
- struct bwgVariableStepItem *next; /* Next in list. */ |
|
32 |
- bits32 start; /* Start position in chromosome. */ |
|
33 |
- float val; /* Value. */ |
|
34 |
- }; |
|
35 |
- |
|
36 |
-struct bwgVariableStepPacked |
|
37 |
-/* An variableStep type item in a bwgSection. */ |
|
38 |
- { |
|
39 |
- bits32 start; /* Start position in chromosome. */ |
|
40 |
- float val; /* Value. */ |
|
41 |
- }; |
|
42 |
- |
|
43 |
-struct bwgFixedStepItem |
|
44 |
-/* An fixedStep type item in a bwgSection. */ |
|
45 |
- { |
|
46 |
- struct bwgFixedStepItem *next; /* Next in list. */ |
|
47 |
- float val; /* Value. */ |
|
48 |
- }; |
|
49 |
- |
|
50 |
-struct bwgFixedStepPacked |
|
51 |
-/* An fixedStep type item in a bwgSection. */ |
|
52 |
- { |
|
53 |
- float val; /* Value. */ |
|
54 |
- }; |
|
55 |
- |
|
56 |
-union bwgItem |
|
57 |
-/* Union of item pointers for all possible section types. */ |
|
58 |
- { |
|
59 |
- struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
60 |
- struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
61 |
- struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
62 |
- /* No packed format for bedGraph... */ |
|
63 |
- }; |
|
64 |
- |
|
65 |
-struct bwgSection |
|
66 |
-/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
67 |
- * structure used by the bigWig creation code. See also bwgSection for the |
|
68 |
- * structure returned by the bigWig reading code. */ |
|
69 |
- { |
|
70 |
- struct bwgSection *next; /* Next in list. */ |
|
71 |
- char *chrom; /* Chromosome name. */ |
|
72 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
73 |
- enum bwgSectionType type; |
|
74 |
- union bwgItem items; /* List/array of items in this section. */ |
|
75 |
- bits32 itemStep; /* Step within item if applicable. */ |
|
76 |
- bits32 itemSpan; /* Item span if applicable. */ |
|
77 |
- bits16 itemCount; /* Number of items in section. */ |
|
78 |
- bits32 chromId; /* Unique small integer value for chromosome. */ |
|
79 |
- bits64 fileOffset; /* Offset of section in file. */ |
|
80 |
- }; |
|
81 |
- |
|
18 |
+static char const rcsid[] = "$Id: bwgCreate.c,v 1.27 2010/06/10 20:13:29 braney Exp $"; |
|
82 | 19 |
|
83 | 20 |
static int bwgBedGraphItemCmp(const void *va, const void *vb) |
84 | 21 |
/* Compare to sort based on query start. */ |
... | ... |
@@ -200,7 +137,7 @@ return bufSize; |
200 | 137 |
} |
201 | 138 |
|
202 | 139 |
|
203 |
-static int bwgSectionCmp(const void *va, const void *vb) |
|
140 |
+int bwgSectionCmp(const void *va, const void *vb) |
|
204 | 141 |
/* Compare to sort based on chrom,start,end. */ |
205 | 142 |
{ |
206 | 143 |
const struct bwgSection *a = *((struct bwgSection **)va); |
... | ... |
@@ -338,7 +275,7 @@ struct lm *lmLocal = lmInit(0); |
338 | 275 |
* adding values from single column to list. */ |
339 | 276 |
char *words[2]; |
340 | 277 |
char *line; |
341 |
-struct bwgVariableStepItem *item, *itemList = NULL; |
|
278 |
+struct bwgVariableStepItem *item, *nextItem, *itemList = NULL; |
|
342 | 279 |
int originalSectionSize = 0; |
343 | 280 |
while (lineFileNextReal(lf, &line)) |
344 | 281 |
{ |
... | ... |
@@ -372,6 +309,20 @@ while (lineFileNextReal(lf, &line)) |
372 | 309 |
} |
373 | 310 |
slSort(&itemList, bwgVariableStepItemCmp); |
374 | 311 |
|
312 |
+/* Make sure no overlap between items. */ |
|
313 |
+if (itemList != NULL) |
|
314 |
+ { |
|
315 |
+ item = itemList; |
|
316 |
+ for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) |
|
317 |
+ { |
|
318 |
+ if (item->start + span > nextItem->start) |
|
319 |
+ errAbort("Overlap on %s between items starting at %d and %d.\n" |
|
320 |
+ "Please remove overlaps and try again", |
|
321 |
+ chrom, item->start, nextItem->start); |
|
322 |
+ item = nextItem; |
|
323 |
+ } |
|
324 |
+ } |
|
325 |
+ |
|
375 | 326 |
/* Break up into sections of no more than items-per-slot size. */ |
376 | 327 |
int sizeLeft = originalSectionSize; |
377 | 328 |
for (item = itemList; item != NULL; ) |
... | ... |
@@ -435,7 +386,7 @@ else |
435 | 386 |
errAbort("Unknown type %s\n", typeWord); |
436 | 387 |
|
437 | 388 |
/* Set up defaults for values we hope to parse out of rest of line. */ |
438 |
-int span = 1; |
|
389 |
+int span = 0; |
|
439 | 390 |
bits32 step = 0; |
440 | 391 |
bits32 start = 0; |
441 | 392 |
char *chrom = NULL; |
... | ... |
@@ -468,8 +419,8 @@ while ((varEqVal = nextWord(&initialLine)) != NULL) |
468 | 419 |
* rest of section. */ |
469 | 420 |
if (chrom == NULL) |
470 | 421 |
errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName); |
471 |
-bits32 chromSize = hashIntVal(chromSizeHash, chrom); |
|
472 |
-if (start >= chromSize) |
|
422 |
+bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM); |
|
423 |
+if (start > chromSize) |
|
473 | 424 |
{ |
474 | 425 |
warn("line %d of %s: chromosome %s has %u bases, but item starts at %u", |
475 | 426 |
lf->lineIx, lf->fileName, chrom, chromSize, start); |
... | ... |
@@ -482,6 +433,8 @@ if (type == bwgTypeFixedStep) |
482 | 433 |
errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
483 | 434 |
if (step == 0) |
484 | 435 |
errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
436 |
+ if (span == 0) |
|
437 |
+ span = step; |
|
485 | 438 |
parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, |
486 | 439 |
chrom, chromSize, span, start-1, step, pSectionList); |
487 | 440 |
} |
... | ... |
@@ -491,6 +444,8 @@ else |
491 | 444 |
errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
492 | 445 |
if (step != 0) |
493 | 446 |
errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
447 |
+ if (span == 0) |
|
448 |
+ span = 1; |
|
494 | 449 |
parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, |
495 | 450 |
chrom, chromSize, span, pSectionList); |
496 | 451 |
} |
... | ... |
@@ -546,7 +501,7 @@ while (lineFileNextReal(lf, &line)) |
546 | 501 |
{ |
547 | 502 |
lmAllocVar(chromHash->lm, chrom); |
548 | 503 |
hashAddSaveName(chromHash, chromName, chrom, &chrom->name); |
549 |
- chrom->size = hashIntVal(chromSizeHash, chromName); |
|
504 |
+ chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); |
|
550 | 505 |
slAddHead(&chromList, chrom); |
551 | 506 |
} |
552 | 507 |
|
... | ... |
@@ -574,10 +529,22 @@ while (lineFileNextReal(lf, &line)) |
574 | 529 |
} |
575 | 530 |
slSort(&chromList, bedGraphChromCmpName); |
576 | 531 |
|
532 |
+/* Loop through each chromosome and output the item list, broken into sections |
|
533 |
+ * for that chrom. */ |
|
577 | 534 |
for (chrom = chromList; chrom != NULL; chrom = chrom->next) |
578 | 535 |
{ |
579 | 536 |
slSort(&chrom->itemList, bwgBedGraphItemCmp); |
580 | 537 |
|
538 |
+ /* Check to make sure no overlap between items. */ |
|
539 |
+ struct bwgBedGraphItem *item = chrom->itemList, *nextItem; |
|
540 |
+ for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) |
|
541 |
+ { |
|
542 |
+ if (item->end > nextItem->start) |
|
543 |
+ errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again", |
|
544 |
+ chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end); |
|
545 |
+ item = nextItem; |
|
546 |
+ } |
|
547 |
+ |
|
581 | 548 |
/* Break up into sections of no more than items-per-slot size. */ |
582 | 549 |
struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; |
583 | 550 |
for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) |
... | ... |
@@ -832,8 +799,7 @@ return outList; |
832 | 799 |
} |
833 | 800 |
|
834 | 801 |
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
835 |
- int blockSize, int itemsPerSlot, boolean doCompress, |
|
836 |
- char *fileName) |
|
802 |
+ int blockSize, int itemsPerSlot, boolean doCompress, char *fileName) |
|
837 | 803 |
/* Create a bigWig file out of a sorted sectionList. */ |
838 | 804 |
{ |
839 | 805 |
bits64 sectionCount = slCount(sectionList); |
... | ... |
@@ -862,12 +828,12 @@ struct bbiChromInfo *chromInfoArray; |
862 | 828 |
int chromCount, maxChromNameSize; |
863 | 829 |
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); |
864 | 830 |
|
865 |
-/* Figure out initial summary level - starting with a summary 20 times the amount |
|
831 |
+/* Figure out initial summary level - starting with a summary 10 times the amount |
|
866 | 832 |
* of the smallest item. See if summarized data is smaller than half input data, if |
867 | 833 |
* not bump up reduction by a factor of 2 until it is, or until further summarying |
868 | 834 |
* yeilds no size reduction. */ |
869 | 835 |
int minRes = bwgAverageResolution(sectionList); |
870 |
-int initialReduction = minRes*20; |
|
836 |
+int initialReduction = minRes*10; |
|
871 | 837 |
bits64 fullSize = bwgTotalSectionSize(sectionList); |
872 | 838 |
bits64 maxReducedSize = fullSize/2; |
873 | 839 |
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL; |
... | ... |
@@ -878,8 +844,7 @@ for (;;) |
878 | 844 |
bits64 summarySize = bbiTotalSummarySize(summaryList); |
879 | 845 |
if (doCompress) |
880 | 846 |
{ |
881 |
- summarySize *= 4; // Compensate for summary not compressing as well as primary data |
|
882 |
- initialReduction *= 4; |
|
847 |
+ summarySize *= 2; // Compensate for summary not compressing as well as primary data |
|
883 | 848 |
} |
884 | 849 |
if (summarySize >= maxReducedSize && summarySize != lastSummarySize) |
885 | 850 |
{ |
... | ... |
@@ -1049,18 +1014,30 @@ for (i=0; i<summaryCount; ++i) |
1049 | 1014 |
writeOne(f, reductionIndexOffsets[i]); |
1050 | 1015 |
} |
1051 | 1016 |
|
1017 |
+/* Write end signature. */ |
|
1018 |
+fseek(f, 0L, SEEK_END); |
|
1019 |
+writeOne(f, sig); |
|
1020 |
+ |
|
1052 | 1021 |
/* Clean up */ |
1053 | 1022 |
freez(&chromInfoArray); |
1054 | 1023 |
carefulClose(&f); |
1055 | 1024 |
} |
1056 | 1025 |
|
1057 |
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash, |
|
1058 |
- int maxSectionSize, struct lm *lm) |
|
1026 |
+struct bwgSection *bwgParseWig( |
|
1027 |
+ char *fileName, /* Name of ascii wig file. */ |
|
1028 |
+ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */ |
|
1029 |
+ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */ |
|
1030 |
+ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */ |
|
1031 |
+ struct lm *lm) /* Memory pool to allocate from. */ |
|
1059 | 1032 |
/* Parse out ascii wig file - allocating memory in lm. */ |
1060 | 1033 |
{ |
1061 | 1034 |
struct lineFile *lf = lineFileOpen(fileName, TRUE); |
1062 | 1035 |
char *line; |
1063 | 1036 |
struct bwgSection *sectionList = NULL; |
1037 |
+ |
|
1038 |
+/* remove initial browser and track lines */ |
|
1039 |
+lineFileRemoveInitialCustomTrackLines(lf); |
|
1040 |
+ |
|
1064 | 1041 |
while (lineFileNextReal(lf, &line)) |
1065 | 1042 |
{ |
1066 | 1043 |
verbose(2, "processing %s\n", line); |
... | ... |
@@ -1089,7 +1066,7 @@ while (lineFileNextReal(lf, &line)) |
1089 | 1066 |
} |
1090 | 1067 |
slSort(§ionList, bwgSectionCmp); |
1091 | 1068 |
|
1092 |
-/* Check for overlap. */ |
|
1069 |
+/* Check for overlap at section level. */ |
|
1093 | 1070 |
struct bwgSection *section, *nextSection; |
1094 | 1071 |
for (section = sectionList; section != NULL; section = nextSection) |
1095 | 1072 |
{ |
git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@44644 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,1138 @@ |
1 |
+/* bwgCreate - create big wig files. Implements write side of bwgInternal.h module. |
|
2 |
+ * See the comment in bwgInternal.h for a description of the file format. */ |
|
3 |
+ |
|
4 |
+#include "common.h" |
|
5 |
+#include "linefile.h" |
|
6 |
+#include "hash.h" |
|
7 |
+#include "localmem.h" |
|
8 |
+#include "errabort.h" |
|
9 |
+#include "sqlNum.h" |
|
10 |
+#include "sig.h" |
|
11 |
+#include "zlibFace.h" |
|
12 |
+#include "bPlusTree.h" |
|
13 |
+#include "cirTree.h" |
|
14 |
+#include "bbiFile.h" |
|
15 |
+#include "bwgInternal.h" |
|
16 |
+#include "bigWig.h" |
|
17 |
+ |
|
18 |
+static char const rcsid[] = "$Id: bwgCreate.c,v 1.22 2009/11/25 07:17:25 kent Exp $"; |
|
19 |
+ |
|
20 |
+struct bwgBedGraphItem |
|
21 |
+/* An bedGraph-type item in a bwgSection. */ |
|
22 |
+ { |
|
23 |
+ struct bwgBedGraphItem *next; /* Next in list. */ |
|
24 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
25 |
+ float val; /* Value. */ |
|
26 |
+ }; |
|
27 |
+ |
|
28 |
+struct bwgVariableStepItem |
|
29 |
+/* An variableStep type item in a bwgSection. */ |
|
30 |
+ { |
|
31 |
+ struct bwgVariableStepItem *next; /* Next in list. */ |
|
32 |
+ bits32 start; /* Start position in chromosome. */ |
|
33 |
+ float val; /* Value. */ |
|
34 |
+ }; |
|
35 |
+ |
|
36 |
+struct bwgVariableStepPacked |
|
37 |
+/* An variableStep type item in a bwgSection. */ |
|
38 |
+ { |
|
39 |
+ bits32 start; /* Start position in chromosome. */ |
|
40 |
+ float val; /* Value. */ |
|
41 |
+ }; |
|
42 |
+ |
|
43 |
+struct bwgFixedStepItem |
|
44 |
+/* An fixedStep type item in a bwgSection. */ |
|
45 |
+ { |
|
46 |
+ struct bwgFixedStepItem *next; /* Next in list. */ |
|
47 |
+ float val; /* Value. */ |
|
48 |
+ }; |
|
49 |
+ |
|
50 |
+struct bwgFixedStepPacked |
|
51 |
+/* An fixedStep type item in a bwgSection. */ |
|
52 |
+ { |
|
53 |
+ float val; /* Value. */ |
|
54 |
+ }; |
|
55 |
+ |
|
56 |
+union bwgItem |
|
57 |
+/* Union of item pointers for all possible section types. */ |
|
58 |
+ { |
|
59 |
+ struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
60 |
+ struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
61 |
+ struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
62 |
+ /* No packed format for bedGraph... */ |
|
63 |
+ }; |
|
64 |
+ |
|
65 |
+struct bwgSection |
|
66 |
+/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
67 |
+ * structure used by the bigWig creation code. See also bwgSection for the |
|
68 |
+ * structure returned by the bigWig reading code. */ |
|
69 |
+ { |
|
70 |
+ struct bwgSection *next; /* Next in list. */ |
|
71 |
+ char *chrom; /* Chromosome name. */ |
|
72 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
73 |
+ enum bwgSectionType type; |
|
74 |
+ union bwgItem items; /* List/array of items in this section. */ |
|
75 |
+ bits32 itemStep; /* Step within item if applicable. */ |
|
76 |
+ bits32 itemSpan; /* Item span if applicable. */ |
|
77 |
+ bits16 itemCount; /* Number of items in section. */ |
|
78 |
+ bits32 chromId; /* Unique small integer value for chromosome. */ |
|
79 |
+ bits64 fileOffset; /* Offset of section in file. */ |
|
80 |
+ }; |
|
81 |
+ |
|
82 |
+ |
|
83 |
+static int bwgBedGraphItemCmp(const void *va, const void *vb) |
|
84 |
+/* Compare to sort based on query start. */ |
|
85 |
+{ |
|
86 |
+const struct bwgBedGraphItem *a = *((struct bwgBedGraphItem **)va); |
|
87 |
+const struct bwgBedGraphItem *b = *((struct bwgBedGraphItem **)vb); |
|
88 |
+int dif = (int)a->start - (int)b->start; |
|
89 |
+if (dif == 0) |
|
90 |
+ dif = (int)a->end - (int)b->end; |
|
91 |
+return dif; |
|
92 |
+} |
|
93 |
+ |
|
94 |
+static int bwgVariableStepItemCmp(const void *va, const void *vb) |
|
95 |
+/* Compare to sort based on query start. */ |
|
96 |
+{ |
|
97 |
+const struct bwgVariableStepItem *a = *((struct bwgVariableStepItem **)va); |
|
98 |
+const struct bwgVariableStepItem *b = *((struct bwgVariableStepItem **)vb); |
|
99 |
+return (int)a->start - (int)b->start; |
|
100 |
+} |
|
101 |
+ |
|
102 |
+void bwgDumpSummary(struct bbiSummary *sum, FILE *f) |
|
103 |
+/* Write out summary info to file. */ |
|
104 |
+{ |
|
105 |
+fprintf(f, "summary %d:%d-%d min=%f, max=%f, sum=%f, sumSquares=%f, validCount=%d, mean=%f\n", |
|
106 |
+ sum->chromId, sum->start, sum->end, sum->minVal, sum->maxVal, sum->sumData, |
|
107 |
+ sum->sumSquares, sum->validCount, sum->sumData/sum->validCount); |
|
108 |
+} |
|
109 |
+ |
|
110 |
+static int bwgSectionWrite(struct bwgSection *section, boolean doCompress, FILE *f) |
|
111 |
+/* Write out section to file, filling in section->fileOffset. */ |
|
112 |
+{ |
|
113 |
+UBYTE type = section->type; |
|
114 |
+UBYTE reserved8 = 0; |
|
115 |
+int itemSize; |
|
116 |
+switch (section->type) |
|
117 |
+ { |
|
118 |
+ case bwgTypeBedGraph: |
|
119 |
+ itemSize = 12; |
|
120 |
+ break; |
|
121 |
+ case bwgTypeVariableStep: |
|
122 |
+ itemSize = 8; |
|
123 |
+ break; |
|
124 |
+ case bwgTypeFixedStep: |
|
125 |
+ itemSize = 4; |
|
126 |
+ break; |
|
127 |
+ default: |
|
128 |
+ itemSize = 0; // Suppress compiler warning |
|
129 |
+ internalErr(); |
|
130 |
+ break; |
|
131 |
+ } |
|
132 |
+int fixedSize = sizeof(section->chromId) + sizeof(section->start) + sizeof(section->end) + |
|
133 |
+ sizeof(section->itemStep) + sizeof(section->itemSpan) + sizeof(type) + sizeof(reserved8) + |
|
134 |
+ sizeof(section->itemCount); |
|
135 |
+int bufSize = section->itemCount * itemSize + fixedSize; |
|
136 |
+char buf[bufSize]; |
|
137 |
+char *bufPt = buf; |
|
138 |
+ |
|
139 |
+section->fileOffset = ftell(f); |
|
140 |
+memWriteOne(&bufPt, section->chromId); |
|
141 |
+memWriteOne(&bufPt, section->start); |
|
142 |
+memWriteOne(&bufPt, section->end); |
|
143 |
+memWriteOne(&bufPt, section->itemStep); |
|
144 |
+memWriteOne(&bufPt, section->itemSpan); |
|
145 |
+memWriteOne(&bufPt, type); |
|
146 |
+memWriteOne(&bufPt, reserved8); |
|
147 |
+memWriteOne(&bufPt, section->itemCount); |
|
148 |
+ |
|
149 |
+int i; |
|
150 |
+switch (section->type) |
|
151 |
+ { |
|
152 |
+ case bwgTypeBedGraph: |
|
153 |
+ { |
|
154 |
+ struct bwgBedGraphItem *item = section->items.bedGraphList; |
|
155 |
+ for (item = section->items.bedGraphList; item != NULL; item = item->next) |
|
156 |
+ { |
|
157 |
+ memWriteOne(&bufPt, item->start); |
|
158 |
+ memWriteOne(&bufPt, item->end); |
|
159 |
+ memWriteOne(&bufPt, item->val); |
|
160 |
+ } |
|
161 |
+ break; |
|
162 |
+ } |
|
163 |
+ case bwgTypeVariableStep: |
|
164 |
+ { |
|
165 |
+ struct bwgVariableStepPacked *items = section->items.variableStepPacked; |
|
166 |
+ for (i=0; i<section->itemCount; ++i) |
|
167 |
+ { |
|
168 |
+ memWriteOne(&bufPt, items->start); |
|
169 |
+ memWriteOne(&bufPt, items->val); |
|
170 |
+ items += 1; |
|
171 |
+ } |
|
172 |
+ break; |
|
173 |
+ } |
|
174 |
+ case bwgTypeFixedStep: |
|
175 |
+ { |
|
176 |
+ struct bwgFixedStepPacked *items = section->items.fixedStepPacked; |
|
177 |
+ for (i=0; i<section->itemCount; ++i) |
|
178 |
+ { |
|
179 |
+ memWriteOne(&bufPt, items->val); |
|
180 |
+ items += 1; |
|
181 |
+ } |
|
182 |
+ break; |
|
183 |
+ } |
|
184 |
+ default: |
|
185 |
+ internalErr(); |
|
186 |
+ break; |
|
187 |
+ } |
|
188 |
+assert(bufSize == (bufPt - buf) ); |
|
189 |
+ |
|
190 |
+if (doCompress) |
|
191 |
+ { |
|
192 |
+ size_t maxCompSize = zCompBufSize(bufSize); |
|
193 |
+ char compBuf[maxCompSize]; |
|
194 |
+ int compSize = zCompress(buf, bufSize, compBuf, maxCompSize); |
|
195 |
+ mustWrite(f, compBuf, compSize); |
|
196 |
+ } |
|
197 |
+else |
|
198 |
+ mustWrite(f, buf, bufSize); |
|
199 |
+return bufSize; |
|
200 |
+} |
|
201 |
+ |
|
202 |
+ |
|
203 |
+static int bwgSectionCmp(const void *va, const void *vb) |
|
204 |
+/* Compare to sort based on chrom,start,end. */ |
|
205 |
+{ |
|
206 |
+const struct bwgSection *a = *((struct bwgSection **)va); |
|
207 |
+const struct bwgSection *b = *((struct bwgSection **)vb); |
|
208 |
+int dif = strcmp(a->chrom, b->chrom); |
|
209 |
+if (dif == 0) |
|
210 |
+ { |
|
211 |
+ dif = (int)a->start - (int)b->start; |
|
212 |
+ if (dif == 0) |
|
213 |
+ dif = (int)a->end - (int)b->end; |
|
214 |
+ } |
|
215 |
+return dif; |
|
216 |
+} |
|
217 |
+ |
|
218 |
+static struct cirTreeRange bwgSectionFetchKey(const void *va, void *context) |
|
219 |
+/* Fetch bwgSection key for r-tree */ |
|
220 |
+{ |
|
221 |
+struct cirTreeRange res; |
|
222 |
+const struct bwgSection *a = *((struct bwgSection **)va); |
|
223 |
+res.chromIx = a->chromId; |
|
224 |
+res.start = a->start; |
|
225 |
+res.end = a->end; |
|
226 |
+return res; |
|
227 |
+} |
|
228 |
+ |
|
229 |
+static bits64 bwgSectionFetchOffset(const void *va, void *context) |
|
230 |
+/* Fetch bwgSection file offset for r-tree */ |
|
231 |
+{ |
|
232 |
+const struct bwgSection *a = *((struct bwgSection **)va); |
|
233 |
+return a->fileOffset; |
|
234 |
+} |
|
235 |
+ |
|
236 |
+static boolean stepTypeLine(char *line) |
|
237 |
+/* Return TRUE if it's a variableStep or fixedStep line. */ |
|
238 |
+{ |
|
239 |
+return (startsWithWord("variableStep", line) || startsWithWord("fixedStep", line)); |
|
240 |
+} |
|
241 |
+ |
|
242 |
+static boolean steppedSectionEnd(char *line, int maxWords) |
|
243 |
+/* Return TRUE if line indicates the start of another section. */ |
|
244 |
+{ |
|
245 |
+int wordCount = chopByWhite(line, NULL, 5); |
|
246 |
+if (wordCount > maxWords) |
|
247 |
+ return TRUE; |
|
248 |
+return stepTypeLine(line); |
|
249 |
+} |
|
250 |
+ |
|
251 |
+static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, |
|
252 |
+ int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, |
|
253 |
+ bits32 step, struct bwgSection **pSectionList) |
|
254 |
+/* Read the single column data in section until get to end. */ |
|
255 |
+{ |
|
256 |
+struct lm *lmLocal = lmInit(0); |
|
257 |
+ |
|
258 |
+/* Stream through section until get to end of file or next section, |
|
259 |
+ * adding values from single column to list. */ |
|
260 |
+char *words[1]; |
|
261 |
+char *line; |
|
262 |
+struct bwgFixedStepItem *item, *itemList = NULL; |
|
263 |
+int originalSectionSize = 0; |
|
264 |
+bits32 sectionEnd = sectionStart; |
|
265 |
+while (lineFileNextReal(lf, &line)) |
|
266 |
+ { |
|
267 |
+ if (steppedSectionEnd(line, 1)) |
|
268 |
+ { |
|
269 |
+ lineFileReuse(lf); |
|
270 |
+ break; |
|
271 |
+ } |
|
272 |
+ chopLine(line, words); |
|
273 |
+ lmAllocVar(lmLocal, item); |
|
274 |
+ item->val = lineFileNeedDouble(lf, words, 0); |
|
275 |
+ if (sectionEnd + span > chromSize) |
|
276 |
+ { |
|
277 |
+ warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", |
|
278 |
+ lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span); |
|
279 |
+ if (!clipDontDie) |
|
280 |
+ noWarnAbort(); |
|
281 |
+ } |
|
282 |
+ else |
|
283 |
+ { |
|
284 |
+ slAddHead(&itemList, item); |
|
285 |
+ ++originalSectionSize; |
|
286 |
+ } |
|
287 |
+ sectionEnd += step; |
|
288 |
+ } |
|
289 |
+slReverse(&itemList); |
|
290 |
+ |
|
291 |
+/* Break up into sections of no more than items-per-slot size, and convert to packed format. */ |
|
292 |
+int sizeLeft = originalSectionSize; |
|
293 |
+for (item = itemList; item != NULL; ) |
|
294 |
+ { |
|
295 |
+ /* Figure out size of this section */ |
|
296 |
+ int sectionSize = sizeLeft; |
|
297 |
+ if (sectionSize > itemsPerSlot) |
|
298 |
+ sectionSize = itemsPerSlot; |
|
299 |
+ sizeLeft -= sectionSize; |
|
300 |
+ |
|
301 |
+ |
|
302 |
+ /* Allocate and fill in section. */ |
|
303 |
+ struct bwgSection *section; |
|
304 |
+ lmAllocVar(lm, section); |
|
305 |
+ section->chrom = chrom; |
|
306 |
+ section->start = sectionStart; |
|
307 |
+ sectionStart += sectionSize * step; |
|
308 |
+ section->end = sectionStart - step + span; |
|
309 |
+ section->type = bwgTypeFixedStep; |
|
310 |
+ section->itemStep = step; |
|
311 |
+ section->itemSpan = span; |
|
312 |
+ section->itemCount = sectionSize; |
|
313 |
+ |
|
314 |
+ /* Allocate array for data, and copy from list to array representation */ |
|
315 |
+ struct bwgFixedStepPacked *packed; /* An array */ |
|
316 |
+ section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize); |
|
317 |
+ int i; |
|
318 |
+ for (i=0; i<sectionSize; ++i) |
|
319 |
+ { |
|
320 |
+ packed->val = item->val; |
|
321 |
+ item = item->next; |
|
322 |
+ ++packed; |
|
323 |
+ } |
|
324 |
+ |
|
325 |
+ /* Add section to list. */ |
|
326 |
+ slAddHead(pSectionList, section); |
|
327 |
+ } |
|
328 |
+lmCleanup(&lmLocal); |
|
329 |
+} |
|
330 |
+ |
|
331 |
+static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm, |
|
332 |
+ int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList) |
|
333 |
+/* Read the single column data in section until get to end. */ |
|
334 |
+{ |
|
335 |
+struct lm *lmLocal = lmInit(0); |
|
336 |
+ |
|
337 |
+/* Stream through section until get to end of file or next section, |
|
338 |
+ * adding values from single column to list. */ |
|
339 |
+char *words[2]; |
|
340 |
+char *line; |
|
341 |
+struct bwgVariableStepItem *item, *itemList = NULL; |
|
342 |
+int originalSectionSize = 0; |
|
343 |
+while (lineFileNextReal(lf, &line)) |
|
344 |
+ { |
|
345 |
+ if (steppedSectionEnd(line, 2)) |
|
346 |
+ { |
|
347 |
+ lineFileReuse(lf); |
|
348 |
+ break; |
|
349 |
+ } |
|
350 |
+ chopLine(line, words); |
|
351 |
+ lmAllocVar(lmLocal, item); |
|
352 |
+ int start = lineFileNeedNum(lf, words, 0); |
|
353 |
+ if (start <= 0) |
|
354 |
+ { |
|
355 |
+ errAbort("line %d of %s: zero or negative chromosome coordinate not allowed", |
|
356 |
+ lf->lineIx, lf->fileName); |
|
357 |
+ } |
|
358 |
+ item->start = start - 1; |
|
359 |
+ item->val = lineFileNeedDouble(lf, words, 1); |
|
360 |
+ if (item->start + span > chromSize) |
|
361 |
+ { |
|
362 |
+ warn("line %d of %s: chromosome %s has %u bases, but item ends at %u", |
|
363 |
+ lf->lineIx, lf->fileName, chrom, chromSize, item->start + span); |
|
364 |
+ if (!clipDontDie) |
|
365 |
+ noWarnAbort(); |
|
366 |
+ } |
|
367 |
+ else |
|
368 |
+ { |
|
369 |
+ slAddHead(&itemList, item); |
|
370 |
+ ++originalSectionSize; |
|
371 |
+ } |
|
372 |
+ } |
|
373 |
+slSort(&itemList, bwgVariableStepItemCmp); |
|
374 |
+ |
|
375 |
+/* Break up into sections of no more than items-per-slot size. */ |
|
376 |
+int sizeLeft = originalSectionSize; |
|
377 |
+for (item = itemList; item != NULL; ) |
|
378 |
+ { |
|
379 |
+ /* Figure out size of this section */ |
|
380 |
+ int sectionSize = sizeLeft; |
|
381 |
+ if (sectionSize > itemsPerSlot) |
|
382 |
+ sectionSize = itemsPerSlot; |
|
383 |
+ sizeLeft -= sectionSize; |
|
384 |
+ |
|
385 |
+ /* Convert from list to array representation. */ |
|
386 |
+ struct bwgVariableStepPacked *packed, *p; |
|
387 |
+ p = lmAllocArray(lm, packed, sectionSize); |
|
388 |
+ int i; |
|
389 |
+ for (i=0; i<sectionSize; ++i) |
|
390 |
+ { |
|
391 |
+ p->start = item->start; |
|
392 |
+ p->val = item->val; |
|
393 |
+ item = item->next; |
|
394 |
+ ++p; |
|
395 |
+ } |
|
396 |
+ |
|
397 |
+ /* Fill in section and add it to list. */ |
|
398 |
+ struct bwgSection *section; |
|
399 |
+ lmAllocVar(lm, section); |
|
400 |
+ section->chrom = chrom; |
|
401 |
+ section->start = packed[0].start; |
|
402 |
+ section->end = packed[sectionSize-1].start + span; |
|
403 |
+ section->type = bwgTypeVariableStep; |
|
404 |
+ section->items.variableStepPacked = packed; |
|
405 |
+ section->itemSpan = span; |
|
406 |
+ section->itemCount = sectionSize; |
|
407 |
+ slAddHead(pSectionList, section); |
|
408 |
+ } |
|
409 |
+lmCleanup(&lmLocal); |
|
410 |
+} |
|
411 |
+ |
|
412 |
+static unsigned parseUnsignedVal(struct lineFile *lf, char *var, char *val) |
|
413 |
+/* Return val as an integer, printing error message if it's not. */ |
|
414 |
+{ |
|
415 |
+char c = val[0]; |
|
416 |
+if (!isdigit(c)) |
|
417 |
+ errAbort("Expecting numerical value for %s, got %s, line %d of %s", |
|
418 |
+ var, val, lf->lineIx, lf->fileName); |
|
419 |
+return sqlUnsigned(val); |
|
420 |
+} |
|
421 |
+ |
|
422 |
+static void parseSteppedSection(struct lineFile *lf, boolean clipDontDie, |
|
423 |
+ struct hash *chromSizeHash, char *initialLine, |
|
424 |
+ struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList) |
|
425 |
+/* Parse out a variableStep or fixedStep section and add it to list, breaking it up as need be. */ |
|
426 |
+{ |
|
427 |
+/* Parse out first word of initial line and make sure it is something we recognize. */ |
|
428 |
+char *typeWord = nextWord(&initialLine); |
|
429 |
+enum bwgSectionType type = bwgTypeFixedStep; |
|
430 |
+if (sameString(typeWord, "variableStep")) |
|
431 |
+ type = bwgTypeVariableStep; |
|
432 |
+else if (sameString(typeWord, "fixedStep")) |
|
433 |
+ type = bwgTypeFixedStep; |
|
434 |
+else |
|
435 |
+ errAbort("Unknown type %s\n", typeWord); |
|
436 |
+ |
|
437 |
+/* Set up defaults for values we hope to parse out of rest of line. */ |
|
438 |
+int span = 1; |
|
439 |
+bits32 step = 0; |
|
440 |
+bits32 start = 0; |
|
441 |
+char *chrom = NULL; |
|
442 |
+ |
|
443 |
+/* Parse out var=val pairs. */ |
|
444 |
+char *varEqVal; |
|
445 |
+while ((varEqVal = nextWord(&initialLine)) != NULL) |
|
446 |
+ { |
|
447 |
+ char *wordPairs[2]; |
|
448 |
+ int wc = chopByChar(varEqVal, '=', wordPairs, 2); |
|
449 |
+ if (wc != 2) |
|
450 |
+ errAbort("strange var=val pair line %d of %s", lf->lineIx, lf->fileName); |
|
451 |
+ char *var = wordPairs[0]; |
|
452 |
+ char *val = wordPairs[1]; |
|
453 |
+ if (sameString(var, "chrom")) |
|
454 |
+ chrom = cloneString(val); |
|
455 |
+ else if (sameString(var, "span")) |
|
456 |
+ span = parseUnsignedVal(lf, var, val); |
|
457 |
+ else if (sameString(var, "step")) |
|
458 |
+ step = parseUnsignedVal(lf, var, val); |
|
459 |
+ else if (sameString(var, "start")) |
|
460 |
+ { |
|
461 |
+ start = parseUnsignedVal(lf, var, val); |
|
462 |
+ } |
|
463 |
+ else |
|
464 |
+ errAbort("Unknown setting %s=%s line %d of %s", var, val, lf->lineIx, lf->fileName); |
|
465 |
+ } |
|
466 |
+ |
|
467 |
+/* Check that we have all that are required and no more, and call type-specific routine to parse |
|
468 |
+ * rest of section. */ |
|
469 |
+if (chrom == NULL) |
|
470 |
+ errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName); |
|
471 |
+bits32 chromSize = hashIntVal(chromSizeHash, chrom); |
|
472 |
+if (start >= chromSize) |
|
473 |
+ { |
|
474 |
+ warn("line %d of %s: chromosome %s has %u bases, but item starts at %u", |
|
475 |
+ lf->lineIx, lf->fileName, chrom, chromSize, start); |
|
476 |
+ if (!clipDontDie) |
|
477 |
+ noWarnAbort(); |
|
478 |
+ } |
|
479 |
+if (type == bwgTypeFixedStep) |
|
480 |
+ { |
|
481 |
+ if (start == 0) |
|
482 |
+ errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
|
483 |
+ if (step == 0) |
|
484 |
+ errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
|
485 |
+ parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, |
|
486 |
+ chrom, chromSize, span, start-1, step, pSectionList); |
|
487 |
+ } |
|
488 |
+else |
|
489 |
+ { |
|
490 |
+ if (start != 0) |
|
491 |
+ errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
|
492 |
+ if (step != 0) |
|
493 |
+ errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
|
494 |
+ parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, |
|
495 |
+ chrom, chromSize, span, pSectionList); |
|
496 |
+ } |
|
497 |
+} |
|
498 |
+ |
|
499 |
+struct bedGraphChrom |
|
500 |
+/* A chromosome in bed graph format. */ |
|
501 |
+ { |
|
502 |
+ struct bedGraphChrom *next; /* Next in list. */ |
|
503 |
+ char *name; /* Chromosome name - not allocated here. */ |
|
504 |
+ bits32 size; /* Chromosome size. */ |
|
505 |
+ struct bwgBedGraphItem *itemList; /* List of items. */ |
|
506 |
+ }; |
|
507 |
+ |
|
508 |
+static int bedGraphChromCmpName(const void *va, const void *vb) |
|
509 |
+/* Compare to sort based on query start. */ |
|
510 |
+{ |
|
511 |
+const struct bedGraphChrom *a = *((struct bedGraphChrom **)va); |
|
512 |
+const struct bedGraphChrom *b = *((struct bedGraphChrom **)vb); |
|
513 |
+return strcmp(a->name, b->name); |
|
514 |
+} |
|
515 |
+ |
|
516 |
+static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, |
|
517 |
+ struct hash *chromSizeHash, struct lm *lm, |
|
518 |
+ int itemsPerSlot, struct bwgSection **pSectionList) |
|
519 |
+/* Parse out bedGraph section until we get to something that is not in bedGraph format. */ |
|
520 |
+{ |
|
521 |
+/* Set up hash and list to store chromosomes. */ |
|
522 |
+struct hash *chromHash = hashNew(0); |
|
523 |
+struct bedGraphChrom *chrom, *chromList = NULL; |
|
524 |
+ |
|
525 |
+/* Collect lines in items on appropriate chromosomes. */ |
|
526 |
+struct bwgBedGraphItem *item; |
|
527 |
+char *line; |
|
528 |
+while (lineFileNextReal(lf, &line)) |
|
529 |
+ { |
|
530 |
+ /* Check for end of section. */ |
|
531 |
+ if (stepTypeLine(line)) |
|
532 |
+ { |
|
533 |
+ lineFileReuse(lf); |
|
534 |
+ break; |
|
535 |
+ } |
|
536 |
+ |
|
537 |
+ /* Parse out our line and make sure it has exactly 4 columns. */ |
|
538 |
+ char *words[5]; |
|
539 |
+ int wordCount = chopLine(line, words); |
|
540 |
+ lineFileExpectWords(lf, 4, wordCount); |
|
541 |
+ |
|
542 |
+ /* Get chromosome. */ |
|
543 |
+ char *chromName = words[0]; |
|
544 |
+ chrom = hashFindVal(chromHash, chromName); |
|
545 |
+ if (chrom == NULL) |
|
546 |
+ { |
|
547 |
+ lmAllocVar(chromHash->lm, chrom); |
|
548 |
+ hashAddSaveName(chromHash, chromName, chrom, &chrom->name); |
|
549 |
+ chrom->size = hashIntVal(chromSizeHash, chromName); |
|
550 |
+ slAddHead(&chromList, chrom); |
|
551 |
+ } |
|
552 |
+ |
|
553 |
+ /* Convert to item and add to chromosome list. */ |
|
554 |
+ lmAllocVar(lm, item); |
|
555 |
+ item->start = lineFileNeedNum(lf, words, 1); |
|
556 |
+ item->end = lineFileNeedNum(lf, words, 2); |
|
557 |
+ item->val = lineFileNeedDouble(lf, words, 3); |
|
558 |
+ |
|
559 |
+ /* Do sanity checking on coordinates. */ |
|
560 |
+ if (item->start > item->end) |
|
561 |
+ errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", |
|
562 |
+ item->start, item->end, lf->lineIx, lf->fileName); |
|
563 |
+ if (item->end > chrom->size) |
|
564 |
+ { |
|
565 |
+ warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u", |
|
566 |
+ lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end); |
|
567 |
+ if (!clipDontDie) |
|
568 |
+ noWarnAbort(); |
|
569 |
+ } |
|
570 |
+ else |
|
571 |
+ { |
|
572 |
+ slAddHead(&chrom->itemList, item); |
|
573 |
+ } |
|
574 |
+ } |
|
575 |
+slSort(&chromList, bedGraphChromCmpName); |
|
576 |
+ |
|
577 |
+for (chrom = chromList; chrom != NULL; chrom = chrom->next) |
|
578 |
+ { |
|
579 |
+ slSort(&chrom->itemList, bwgBedGraphItemCmp); |
|
580 |
+ |
|
581 |
+ /* Break up into sections of no more than items-per-slot size. */ |
|
582 |
+ struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; |
|
583 |
+ for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) |
|
584 |
+ { |
|
585 |
+ /* Find end item of this section, and start item for next section. |
|
586 |
+ * Terminate list at end item. */ |
|
587 |
+ int sectionSize = 0; |
|
588 |
+ int i; |
|
589 |
+ endItem = startItem; |
|
590 |
+ for (i=0; i<itemsPerSlot; ++i) |
|
591 |
+ { |
|
592 |
+ if (nextStartItem == NULL) |
|
593 |
+ break; |
|
594 |
+ endItem = nextStartItem; |
|
595 |
+ nextStartItem = nextStartItem->next; |
|
596 |
+ ++sectionSize; |
|
597 |
+ } |
|
598 |
+ endItem->next = NULL; |
|
599 |
+ |
|
600 |
+ /* Fill in section and add it to section list. */ |
|
601 |
+ struct bwgSection *section; |
|
602 |
+ lmAllocVar(lm, section); |
|
603 |
+ section->chrom = cloneString(chrom->name); |
|
604 |
+ section->start = startItem->start; |
|
605 |
+ section->end = endItem->end; |
|
606 |
+ section->type = bwgTypeBedGraph; |
|
607 |
+ section->items.bedGraphList = startItem; |
|
608 |
+ section->itemCount = sectionSize; |
|
609 |
+ slAddHead(pSectionList, section); |
|
610 |
+ } |
|
611 |
+ } |
|
612 |
+ |
|
613 |
+/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in |
|
614 |
+ * hash's memory. */ |
|
615 |
+hashFree(&chromHash); |
|
616 |
+chromList = NULL; |
|
617 |
+} |
|
618 |
+ |
|
619 |
+void bwgMakeChromInfo(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
620 |
+ int *retChromCount, struct bbiChromInfo **retChromArray, |
|
621 |
+ int *retMaxChromNameSize) |
|
622 |
+/* Fill in chromId field in sectionList. Return array of chromosome name/ids. |
|
623 |
+ * The chromSizeHash is keyed by name, and has int values. */ |
|
624 |
+{ |
|
625 |
+/* Build up list of unique chromosome names. */ |
|
626 |
+struct bwgSection *section; |
|
627 |
+char *chromName = ""; |
|
628 |
+int chromCount = 0; |
|
629 |
+int maxChromNameSize = 0; |
|
630 |
+struct slRef *uniq, *uniqList = NULL; |
|
631 |
+for (section = sectionList; section != NULL; section = section->next) |
|
632 |
+ { |
|
633 |
+ if (!sameString(section->chrom, chromName)) |
|
634 |
+ { |
|
635 |
+ chromName = section->chrom; |
|
636 |
+ refAdd(&uniqList, chromName); |
|
637 |
+ ++chromCount; |
|
638 |
+ int len = strlen(chromName); |
|
639 |
+ if (len > maxChromNameSize) |
|
640 |
+ maxChromNameSize = len; |
|
641 |
+ } |
|
642 |
+ section->chromId = chromCount-1; |
|
643 |
+ } |
|
644 |
+slReverse(&uniqList); |
|
645 |
+ |
|
646 |
+/* Allocate and fill in results array. */ |
|
647 |
+struct bbiChromInfo *chromArray; |
|
648 |
+AllocArray(chromArray, chromCount); |
|
649 |
+int i; |
|
650 |
+for (i = 0, uniq = uniqList; i < chromCount; ++i, uniq = uniq->next) |
|
651 |
+ { |
|
652 |
+ chromArray[i].name = uniq->val; |
|
653 |
+ chromArray[i].id = i; |
|
654 |
+ chromArray[i].size = hashIntVal(chromSizeHash, uniq->val); |
|
655 |
+ } |
|
656 |
+ |
|
657 |
+/* Clean up, set return values and go home. */ |
|
658 |
+slFreeList(&uniqList); |
|
659 |
+*retChromCount = chromCount; |
|
660 |
+*retChromArray = chromArray; |
|
661 |
+*retMaxChromNameSize = maxChromNameSize; |
|
662 |
+} |
|
663 |
+ |
|
664 |
+int bwgAverageResolution(struct bwgSection *sectionList) |
|
665 |
+/* Return the average resolution seen in sectionList. */ |
|
666 |
+{ |
|
667 |
+if (sectionList == NULL) |
|
668 |
+ return 1; |
|
669 |
+bits64 totalRes = 0; |
|
670 |
+bits32 sectionCount = 0; |
|
671 |
+struct bwgSection *section; |
|
672 |
+int i; |
|
673 |
+for (section = sectionList; section != NULL; section = section->next) |
|
674 |
+ { |
|
675 |
+ int sectionRes = 0; |
|
676 |
+ switch (section->type) |
|
677 |
+ { |
|
678 |
+ case bwgTypeBedGraph: |
|
679 |
+ { |
|
680 |
+ struct bwgBedGraphItem *item; |
|
681 |
+ sectionRes = BIGNUM; |
|
682 |
+ for (item = section->items.bedGraphList; item != NULL; item = item->next) |
|
683 |
+ { |
|
684 |
+ int size = item->end - item->start; |
|
685 |
+ if (sectionRes > size) |
|
686 |
+ sectionRes = size; |
|
687 |
+ } |
|
688 |
+ break; |
|
689 |
+ } |
|
690 |
+ case bwgTypeVariableStep: |
|
691 |
+ { |
|
692 |
+ struct bwgVariableStepPacked *items = section->items.variableStepPacked, *prev; |
|
693 |
+ bits32 smallestGap = BIGNUM; |
|
694 |
+ for (i=1; i<section->itemCount; ++i) |
|
695 |
+ { |
|
696 |
+ prev = items; |
|
697 |
+ items += 1; |
|
698 |
+ bits32 gap = items->start - prev->start; |
|
699 |
+ if (smallestGap > gap) |
|
700 |
+ smallestGap = gap; |
|
701 |
+ } |
|
702 |
+ if (smallestGap != BIGNUM) |
|
703 |
+ sectionRes = smallestGap; |
|
704 |
+ else |
|
705 |
+ sectionRes = section->itemSpan; |
|
706 |
+ break; |
|
707 |
+ } |
|
708 |
+ case bwgTypeFixedStep: |
|
709 |
+ { |
|
710 |
+ sectionRes = section->itemStep; |
|
711 |
+ break; |
|
712 |
+ } |
|
713 |
+ default: |
|
714 |
+ internalErr(); |
|
715 |
+ break; |
|
716 |
+ } |
|
717 |
+ totalRes += sectionRes; |
|
718 |
+ ++sectionCount; |
|
719 |
+ } |
|
720 |
+return (totalRes + sectionCount/2)/sectionCount; |
|
721 |
+} |
|
722 |
+ |
|
723 |
+#define bwgSectionHeaderSize 24 /* Size of section header in file. */ |
|
724 |
+ |
|
725 |
+static int bwgItemSize(enum bwgSectionType type) |
|
726 |
+/* Return size of an item inside a particular section. */ |
|
727 |
+{ |
|
728 |
+switch (type) |
|
729 |
+ { |
|
730 |
+ case bwgTypeBedGraph: |
|
731 |
+ return 2*sizeof(bits32) + sizeof(float); |
|
732 |
+ break; |
|
733 |
+ case bwgTypeVariableStep: |
|
734 |
+ return sizeof(bits32) + sizeof(float); |
|
735 |
+ break; |
|
736 |
+ case bwgTypeFixedStep: |
|
737 |
+ return sizeof(float); |
|
738 |
+ break; |
|
739 |
+ default: |
|
740 |
+ internalErr(); |
|
741 |
+ return 0; |
|
742 |
+ } |
|
743 |
+} |
|
744 |
+ |
|
745 |
+static int bwgSectionSize(struct bwgSection *section) |
|
746 |
+/* Return size (on disk) of section. */ |
|
747 |
+{ |
|
748 |
+return bwgSectionHeaderSize + bwgItemSize(section->type) * section->itemCount; |
|
749 |
+} |
|
750 |
+ |
|
751 |
+static bits64 bwgTotalSectionSize(struct bwgSection *sectionList) |
|
752 |
+/* Return total size of all sections. */ |
|
753 |
+{ |
|
754 |
+bits64 total = 0; |
|
755 |
+struct bwgSection *section; |
|
756 |
+for (section = sectionList; section != NULL; section = section->next) |
|
757 |
+ total += bwgSectionSize(section); |
|
758 |
+return total; |
|
759 |
+} |
|
760 |
+ |
|
761 |
+static void bwgReduceBedGraph(struct bwgSection *section, bits32 chromSize, int reduction, |
|
762 |
+ struct bbiSummary **pOutList) |
|
763 |
+/*Reduce a bedGraph section onto outList. */ |
|
764 |
+{ |
|
765 |
+struct bwgBedGraphItem *item = section->items.bedGraphList; |
|
766 |
+for (item = section->items.bedGraphList; item != NULL; item = item->next) |
|
767 |
+ { |
|
768 |
+ bbiAddRangeToSummary(section->chromId, chromSize, item->start, item->end, |
|
769 |
+ item->val, reduction, pOutList); |
|
770 |
+ } |
|
771 |
+} |
|
772 |
+ |
|
773 |
+static void bwgReduceVariableStep(struct bwgSection *section, bits32 chromSize, int reduction, |
|
774 |
+ struct bbiSummary **pOutList) |
|
775 |
+/*Reduce a variableStep section onto outList. */ |
|
776 |
+{ |
|
777 |
+struct bwgVariableStepPacked *items = section->items.variableStepPacked; |
|
778 |
+int i; |
|
779 |
+for (i=0; i<section->itemCount; ++i) |
|
780 |
+ { |
|
781 |
+ bbiAddRangeToSummary(section->chromId, chromSize, |
|
782 |
+ items->start, items->start + section->itemSpan, items->val, reduction, pOutList); |
|
783 |
+ items += 1; |
|
784 |
+ } |
|
785 |
+} |
|
786 |
+ |
|
787 |
+static void bwgReduceFixedStep(struct bwgSection *section, bits32 chromSize, int reduction, |
|
788 |
+ struct bbiSummary **pOutList) |
|
789 |
+/*Reduce a fixedStep section onto outList. */ |
|
790 |
+{ |
|
791 |
+struct bwgFixedStepPacked *items = section->items.fixedStepPacked; |
|
792 |
+int start = section->start; |
|
793 |
+int i; |
|
794 |
+for (i=0; i<section->itemCount; ++i) |
|
795 |
+ { |
|
796 |
+ bbiAddRangeToSummary(section->chromId, chromSize, start, start + section->itemSpan, items->val, |
|
797 |
+ reduction, pOutList); |
|
798 |
+ start += section->itemStep; |
|
799 |
+ items += 1; |
|
800 |
+ } |
|
801 |
+} |
|
802 |
+ |
|
803 |
+struct bbiSummary *bwgReduceSectionList(struct bwgSection *sectionList, |
|
804 |
+ struct bbiChromInfo *chromInfoArray, int reduction) |
|
805 |
+/* Return summary of section list reduced by given amount. */ |
|
806 |
+{ |
|
807 |
+struct bbiSummary *outList = NULL; |
|
808 |
+struct bwgSection *section = NULL; |
|
809 |
+ |
|
810 |
+/* Loop through input section list reducing into outList. */ |
|
811 |
+for (section = sectionList; section != NULL; section = section->next) |
|
812 |
+ { |
|
813 |
+ bits32 chromSize = chromInfoArray[section->chromId].size; |
|
814 |
+ switch (section->type) |
|
815 |
+ { |
|
816 |
+ case bwgTypeBedGraph: |
|
817 |
+ bwgReduceBedGraph(section, chromSize, reduction, &outList); |
|
818 |
+ break; |
|
819 |
+ case bwgTypeVariableStep: |
|
820 |
+ bwgReduceVariableStep(section, chromSize, reduction, &outList); |
|
821 |
+ break; |
|
822 |
+ case bwgTypeFixedStep: |
|
823 |
+ bwgReduceFixedStep(section, chromSize, reduction, &outList); |
|
824 |
+ break; |
|
825 |
+ default: |
|
826 |
+ internalErr(); |
|
827 |
+ return 0; |
|
828 |
+ } |
|
829 |
+ } |
|
830 |
+slReverse(&outList); |
|
831 |
+return outList; |
|
832 |
+} |
|
833 |
+ |
|
834 |
+void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
835 |
+ int blockSize, int itemsPerSlot, boolean doCompress, |
|
836 |
+ char *fileName) |
|
837 |
+/* Create a bigWig file out of a sorted sectionList. */ |
|