Browse code

update Kent library

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@98880 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 29/01/2015 23:07:25
Showing1 changed files
... ...
@@ -1,11 +1,13 @@
1 1
 /* bwgCreate - create big wig files.  Implements write side of bwgInternal.h module. 
2 2
  * See the comment in bwgInternal.h for a description of the file format. */
3 3
 
4
+/* Copyright (C) 2014 The Regents of the University of California 
5
+ * See README in this or parent directory for licensing information. */
6
+
4 7
 #include "common.h"
5 8
 #include "linefile.h"
6 9
 #include "hash.h"
7
-#include "localmem.h"
8
-#include "errabort.h"
10
+#include "errAbort.h"
9 11
 #include "sqlNum.h"
10 12
 #include "sig.h"
11 13
 #include "zlibFace.h"
... ...
@@ -15,7 +17,6 @@
15 17
 #include "bwgInternal.h"
16 18
 #include "bigWig.h"
17 19
 
18
-static char const rcsid[] = "$Id: bwgCreate.c,v 1.27 2010/06/10 20:13:29 braney Exp $";
19 20
 
20 21
 static int bwgBedGraphItemCmp(const void *va, const void *vb)
21 22
 /* Compare to sort based on query start. */
... ...
@@ -628,6 +629,78 @@ slFreeList(&uniqList);
628 629
 *retMaxChromNameSize = maxChromNameSize;
629 630
 }
630 631
 
632
+static int bwgStrcmp (const void * A, const void * B) {
633
+	char * stringA = *((char **) A);
634
+	char * stringB = *((char **) B);
635
+	return strcmp(stringA, stringB);
636
+}
637
+
638
+void bwgMakeAllChromInfo(struct bwgSection *sectionList, struct hash *chromSizeHash,
639
+	int *retChromCount, struct bbiChromInfo **retChromArray,
640
+	int *retMaxChromNameSize)
641
+/* Fill in chromId field in sectionList.  Return array of chromosome name/ids. 
642
+ * The chromSizeHash is keyed by name, and has int values. */
643
+{
644
+/* Build up list of unique chromosome names. */
645
+int maxChromNameSize = 0;
646
+
647
+/* Get list of values */
648
+int chromCount = chromSizeHash->elCount;
649
+char ** chromName, ** chromNames;
650
+AllocArray(chromNames, chromCount);
651
+chromName = chromNames;
652
+struct hashEl* el;
653
+struct hashCookie cookie = hashFirst(chromSizeHash);
654
+for (el = hashNext(&cookie); el; el = hashNext(&cookie)) {
655
+	*chromName = el->name;
656
+	if (strlen(el->name) > maxChromNameSize)
657
+		maxChromNameSize = strlen(el->name);
658
+	chromName++;
659
+}
660
+qsort(chromNames, chromCount, sizeof(char *), bwgStrcmp);
661
+
662
+/* Allocate and fill in results array. */
663
+struct bbiChromInfo *chromArray;
664
+AllocArray(chromArray, chromCount);
665
+int i;
666
+for (i = 0; i < chromCount; ++i)
667
+    {
668
+    chromArray[i].name = chromNames[i];
669
+    chromArray[i].id = i;
670
+    chromArray[i].size = hashIntVal(chromSizeHash, chromNames[i]);
671
+    }
672
+
673
+// Assign IDs to sections:
674
+struct bwgSection *section;
675
+char *name = "";
676
+bits32 chromId = 0;
677
+for (section = sectionList; section != NULL; section = section->next)
678
+    {
679
+    if (!sameString(section->chrom, name))
680
+        {
681
+        for (i = 0; i < chromCount; ++i)
682
+            {
683
+	    if (sameString(section->chrom, chromArray[i].name)) 
684
+	        {
685
+		    section->chromId = i;
686
+	    	    break;
687
+	        }
688
+	    }
689
+	if (i == chromCount)
690
+		errAbort("Could not find %s in list of chromosomes\n", section->chrom);
691
+	chromId = section->chromId;
692
+	name = section->chrom;
693
+	}
694
+    else 
695
+	section->chromId = chromId;
696
+    }
697
+
698
+/* Clean up, set return values and go home. */
699
+*retChromCount = chromCount;
700
+*retChromArray = chromArray;
701
+*retMaxChromNameSize = maxChromNameSize;
702
+}
703
+
631 704
 int bwgAverageResolution(struct bwgSection *sectionList)
632 705
 /* Return the average resolution seen in sectionList. */
633 706
 {
... ...
@@ -798,46 +871,18 @@ slReverse(&outList);
798 871
 return outList;
799 872
 }
800 873
 
801
-void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
802
-	int blockSize, int itemsPerSlot, boolean doCompress, char *fileName)
803
-/* Create a bigWig file out of a sorted sectionList. */
804
-{
805
-bits64 sectionCount = slCount(sectionList);
806
-FILE *f = mustOpen(fileName, "wb");
807
-bits32 sig = bigWigSig;
808
-bits16 version = bbiCurrentVersion;
809
-bits16 summaryCount = 0;
810
-bits16 reserved16 = 0;
811
-bits32 reserved32 = 0;
812
-bits64 reserved64 = 0;
813
-bits64 dataOffset = 0, dataOffsetPos;
814
-bits64 indexOffset = 0, indexOffsetPos;
815
-bits64 chromTreeOffset = 0, chromTreeOffsetPos;
816
-bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
817
-bits32 uncompressBufSize = 0;
818
-bits64 uncompressBufSizePos;
819
-struct bbiSummary *reduceSummaries[10];
820
-bits32 reductionAmounts[10];
821
-bits64 reductionDataOffsetPos[10];
822
-bits64 reductionDataOffsets[10];
823
-bits64 reductionIndexOffsets[10];
824
-int i;
825
-
826
-/* Figure out chromosome ID's. */
827
-struct bbiChromInfo *chromInfoArray;
828
-int chromCount, maxChromNameSize;
829
-bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
830
-
874
+static void bwgComputeDynamicSummaries(struct bwgSection *sectionList, struct bbiSummary ** reduceSummaries, bits16 * summaryCount, struct bbiChromInfo *chromInfoArray, int chromCount, bits32 * reductionAmounts, boolean doCompress) {
831 875
 /* Figure out initial summary level - starting with a summary 10 times the amount
832 876
  * of the smallest item.  See if summarized data is smaller than half input data, if
833 877
  * not bump up reduction by a factor of 2 until it is, or until further summarying
834 878
  * yeilds no size reduction. */
879
+int i;
835 880
 int  minRes = bwgAverageResolution(sectionList);
836 881
 int initialReduction = minRes*10;
837 882
 bits64 fullSize = bwgTotalSectionSize(sectionList);
838
-bits64 maxReducedSize = fullSize/2;
839
-struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
840 883
 bits64 lastSummarySize = 0, summarySize;
884
+bits64 maxReducedSize = fullSize/2;
885
+struct bbiSummary *summaryList = NULL;
841 886
 for (;;)
842 887
     {
843 888
     summaryList = bwgReduceSectionList(sectionList, chromInfoArray, initialReduction);
... ...
@@ -862,31 +907,92 @@ for (;;)
862 907
     else
863 908
         break;
864 909
     }
865
-summaryCount = 1;
866
-reduceSummaries[0] = firstSummaryList = summaryList;
910
+*summaryCount = 1;
911
+reduceSummaries[0] = summaryList;
867 912
 reductionAmounts[0] = initialReduction;
868 913
 
869 914
 /* Now calculate up to 10 levels of further summary. */
870 915
 bits64 reduction = initialReduction;
871
-for (i=0; i<ArraySize(reduceSummaries)-1; i++)
916
+for (i=0; i<9; i++)
872 917
     {
873 918
     reduction *= 4;
874 919
     if (reduction > 1000000000)
875 920
         break;
876
-    summaryList = bbiReduceSummaryList(reduceSummaries[summaryCount-1], chromInfoArray, 
921
+    summaryList = bbiReduceSummaryList(reduceSummaries[*summaryCount-1], chromInfoArray, 
877 922
     	reduction);
878 923
     summarySize = bbiTotalSummarySize(summaryList);
879 924
     if (summarySize != lastSummarySize)
880 925
         {
881
- 	reduceSummaries[summaryCount] = summaryList;
882
-	reductionAmounts[summaryCount] = reduction;
883
-	++summaryCount;
926
+ 	reduceSummaries[*summaryCount] = summaryList;
927
+	reductionAmounts[*summaryCount] = reduction;
928
+	++(*summaryCount);
884 929
 	}
885 930
     int summaryItemCount = slCount(summaryList);
886 931
     if (summaryItemCount <= chromCount)
887 932
         break;
888 933
     }
889 934
 
935
+}
936
+
937
+static void bwgComputeFixedSummaries(struct bwgSection * sectionList, struct bbiSummary ** reduceSummaries, bits16 * summaryCount, struct bbiChromInfo *chromInfoArray, bits32 * reductionAmounts) {
938
+// Hack: pre-defining summary levels, set off Ensembl default zoom levels
939
+// The last two values of this array were extrapolated following Jim's formula
940
+int i;
941
+#define REDUCTION_COUNT 10
942
+bits32 presetReductions[REDUCTION_COUNT] = {30, 65, 130, 260, 450, 648, 950, 1296, 4800, 19200}; 
943
+
944
+bits64 reduction = reductionAmounts[0] = presetReductions[0];
945
+reduceSummaries[0] = bwgReduceSectionList(sectionList, chromInfoArray, presetReductions[0]);
946
+
947
+for (i=1; i<REDUCTION_COUNT; i++)
948
+    {
949
+    reduction = reductionAmounts[i] = presetReductions[i];
950
+    reduceSummaries[i] = bbiReduceSummaryList(reduceSummaries[i-1], chromInfoArray, 
951
+    	reduction);
952
+    }
953
+
954
+*summaryCount = REDUCTION_COUNT;
955
+}
956
+
957
+void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
958
+	int blockSize, int itemsPerSlot, boolean doCompress, boolean keepAllChromosomes,
959
+        boolean fixedSummaries, char *fileName)
960
+/* Create a bigWig file out of a sorted sectionList. */
961
+{
962
+bits64 sectionCount = slCount(sectionList);
963
+FILE *f = mustOpen(fileName, "wb");
964
+bits32 sig = bigWigSig;
965
+bits16 version = bbiCurrentVersion;
966
+bits16 summaryCount = 0;
967
+bits16 reserved16 = 0;
968
+bits32 reserved32 = 0;
969
+bits64 reserved64 = 0;
970
+bits64 dataOffset = 0, dataOffsetPos;
971
+bits64 indexOffset = 0, indexOffsetPos;
972
+bits64 chromTreeOffset = 0, chromTreeOffsetPos;
973
+bits64 totalSummaryOffset = 0, totalSummaryOffsetPos;
974
+bits32 uncompressBufSize = 0;
975
+bits64 uncompressBufSizePos;
976
+struct bbiSummary *reduceSummaries[10];
977
+bits32 reductionAmounts[10];
978
+bits64 reductionDataOffsetPos[10];
979
+bits64 reductionDataOffsets[10];
980
+bits64 reductionIndexOffsets[10];
981
+int i;
982
+
983
+/* Figure out chromosome ID's. */
984
+struct bbiChromInfo *chromInfoArray;
985
+int chromCount, maxChromNameSize;
986
+if (keepAllChromosomes)
987
+    bwgMakeAllChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
988
+else
989
+    bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
990
+
991
+if (fixedSummaries) 
992
+    bwgComputeFixedSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, reductionAmounts);
993
+else 
994
+    bwgComputeDynamicSummaries(sectionList, reduceSummaries, &summaryCount, chromInfoArray, chromCount, reductionAmounts, doCompress);
995
+
890 996
 /* Write fixed header. */
891 997
 writeOne(f, sig);
892 998
 writeOne(f, version);
... ...
@@ -904,8 +1010,8 @@ totalSummaryOffsetPos = ftell(f);
904 1010
 writeOne(f, totalSummaryOffset);
905 1011
 uncompressBufSizePos = ftell(f);
906 1012
 writeOne(f, uncompressBufSize);
907
-for (i=0; i<2; ++i)
908
-    writeOne(f, reserved32);
1013
+writeOne(f, reserved64);  /* nameIndexOffset */
1014
+assert(ftell(f) == 64);
909 1015
 
910 1016
 /* Write summary headers */
911 1017
 for (i=0; i<summaryCount; ++i)
... ...
@@ -964,7 +1070,7 @@ for (i=0; i<summaryCount; ++i)
964 1070
     }
965 1071
 
966 1072
 /* Calculate summary */
967
-struct bbiSummary *sum = firstSummaryList;
1073
+struct bbiSummary *sum = reduceSummaries[0];
968 1074
 if (sum != NULL)
969 1075
     {
970 1076
     totalSum.validCount = sum->validCount;
... ...
@@ -1087,13 +1193,15 @@ for (section = sectionList; section != NULL; section = nextSection)
1087 1193
 return sectionList;
1088 1194
 }
1089 1195
 
1090
-void bigWigFileCreate(
1196
+void bigWigFileCreateEx(
1091 1197
 	char *inName, 		/* Input file in ascii wiggle format. */
1092 1198
 	char *chromSizes, 	/* Two column tab-separated file: <chromosome> <size>. */
1093 1199
 	int blockSize,		/* Number of items to bundle in r-tree.  1024 is good. */
1094 1200
 	int itemsPerSlot,	/* Number of items in lowest level of tree.  512 is good. */
1095 1201
 	boolean clipDontDie,	/* If TRUE then clip items off end of chrom rather than dying. */
1096 1202
 	boolean compress,	/* If TRUE then compress data. */
1203
+	boolean keepAllChromosomes,	/* If TRUE then store all chromosomes in chromosomal b-tree. */
1204
+	boolean fixedSummaries,	/* If TRUE then impose fixed summary levels. */
1097 1205
 	char *outName)
1098 1206
 /* Convert ascii format wig file (in fixedStep, variableStep or bedGraph format) 
1099 1207
  * to binary big wig format. */
... ...
@@ -1109,7 +1217,22 @@ struct lm *lm = lmInit(0);
1109 1217
 struct bwgSection *sectionList = bwgParseWig(inName, clipDontDie, chromSizeHash, itemsPerSlot, lm);
1110 1218
 if (sectionList == NULL)
1111 1219
     errAbort("%s is empty of data", inName);
1112
-bwgCreate(sectionList, chromSizeHash, blockSize, itemsPerSlot, compress, outName);
1220
+bwgCreate(sectionList, chromSizeHash, blockSize, itemsPerSlot, compress, keepAllChromosomes, fixedSummaries, outName);
1113 1221
 lmCleanup(&lm);
1114 1222
 }
1115 1223
 
1224
+void bigWigFileCreate(
1225
+	char *inName, 		/* Input file in ascii wiggle format. */
1226
+	char *chromSizes, 	/* Two column tab-separated file: <chromosome> <size>. */
1227
+	int blockSize,		/* Number of items to bundle in r-tree.  1024 is good. */
1228
+	int itemsPerSlot,	/* Number of items in lowest level of tree.  512 is good. */
1229
+	boolean clipDontDie,	/* If TRUE then clip items off end of chrom rather than dying. */
1230
+	boolean compress,	/* If TRUE then compress data. */
1231
+	char *outName)
1232
+/* Convert ascii format wig file (in fixedStep, variableStep or bedGraph format) 
1233
+ * to binary big wig format. */
1234
+{
1235
+bigWigFileCreateEx( inName, chromSizes, blockSize, itemsPerSlot, clipDontDie,
1236
+	compress, FALSE, FALSE, outName);
1237
+}
1238
+
Browse code

Updates to UCSC library

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@57674 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 26/08/2011 13:57:00
Showing1 changed files
... ...
@@ -15,70 +15,7 @@
15 15
 #include "bwgInternal.h"
16 16
 #include "bigWig.h"
17 17
 
18
-static char const rcsid[] = "$Id: bwgCreate.c,v 1.22 2009/11/25 07:17:25 kent Exp $";
19
-
20
-struct bwgBedGraphItem
21
-/* An bedGraph-type item in a bwgSection. */
22
-    {
23
-    struct bwgBedGraphItem *next;	/* Next in list. */
24
-    bits32 start,end;		/* Range of chromosome covered. */
25
-    float val;			/* Value. */
26
-    };
27
-
28
-struct bwgVariableStepItem
29
-/* An variableStep type item in a bwgSection. */
30
-    {
31
-    struct bwgVariableStepItem *next;	/* Next in list. */
32
-    bits32 start;		/* Start position in chromosome. */
33
-    float val;			/* Value. */
34
-    };
35
-
36
-struct bwgVariableStepPacked
37
-/* An variableStep type item in a bwgSection. */
38
-    {
39
-    bits32 start;		/* Start position in chromosome. */
40
-    float val;			/* Value. */
41
-    };
42
-
43
-struct bwgFixedStepItem
44
-/* An fixedStep type item in a bwgSection. */
45
-    {
46
-    struct bwgFixedStepItem *next;	/* Next in list. */
47
-    float val;			/* Value. */
48
-    };
49
-
50
-struct bwgFixedStepPacked
51
-/* An fixedStep type item in a bwgSection. */
52
-    {
53
-    float val;			/* Value. */
54
-    };
55
-
56
-union bwgItem
57
-/* Union of item pointers for all possible section types. */
58
-    {
59
-    struct bwgBedGraphItem *bedGraphList;		/* A linked list */
60
-    struct bwgFixedStepPacked *fixedStepPacked;		/* An array */
61
-    struct bwgVariableStepPacked *variableStepPacked;	/* An array */
62
-    /* No packed format for bedGraph... */
63
-    };
64
-
65
-struct bwgSection
66
-/* A section of a bigWig file - all on same chrom.  This is a somewhat fat data
67
- * structure used by the bigWig creation code.  See also bwgSection for the
68
- * structure returned by the bigWig reading code. */
69
-    {
70
-    struct bwgSection *next;		/* Next in list. */
71
-    char *chrom;			/* Chromosome name. */
72
-    bits32 start,end;			/* Range of chromosome covered. */
73
-    enum bwgSectionType type;
74
-    union bwgItem items;		/* List/array of items in this section. */
75
-    bits32 itemStep;			/* Step within item if applicable. */
76
-    bits32 itemSpan;			/* Item span if applicable. */
77
-    bits16 itemCount;			/* Number of items in section. */
78
-    bits32 chromId;			/* Unique small integer value for chromosome. */
79
-    bits64 fileOffset;			/* Offset of section in file. */
80
-    };
81
-
18
+static char const rcsid[] = "$Id: bwgCreate.c,v 1.27 2010/06/10 20:13:29 braney Exp $";
82 19
 
83 20
 static int bwgBedGraphItemCmp(const void *va, const void *vb)
84 21
 /* Compare to sort based on query start. */
... ...
@@ -200,7 +137,7 @@ return bufSize;
200 137
 }
201 138
 
202 139
 
203
-static int bwgSectionCmp(const void *va, const void *vb)
140
+int bwgSectionCmp(const void *va, const void *vb)
204 141
 /* Compare to sort based on chrom,start,end.  */
205 142
 {
206 143
 const struct bwgSection *a = *((struct bwgSection **)va);
... ...
@@ -338,7 +275,7 @@ struct lm *lmLocal = lmInit(0);
338 275
  * adding values from single column to list. */
339 276
 char *words[2];
340 277
 char *line;
341
-struct bwgVariableStepItem *item, *itemList = NULL;
278
+struct bwgVariableStepItem *item, *nextItem, *itemList = NULL;
342 279
 int originalSectionSize = 0;
343 280
 while (lineFileNextReal(lf, &line))
344 281
     {
... ...
@@ -372,6 +309,20 @@ while (lineFileNextReal(lf, &line))
372 309
     }
373 310
 slSort(&itemList, bwgVariableStepItemCmp);
374 311
 
312
+/* Make sure no overlap between items. */
313
+if (itemList != NULL)
314
+    {
315
+    item = itemList;
316
+    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
317
+        {
318
+	if (item->start + span > nextItem->start)
319
+	    errAbort("Overlap on %s between items starting at %d and %d.\n"
320
+	             "Please remove overlaps and try again",
321
+		    chrom, item->start, nextItem->start);
322
+	item = nextItem;
323
+	}
324
+    }
325
+
375 326
 /* Break up into sections of no more than items-per-slot size. */
376 327
 int sizeLeft = originalSectionSize;
377 328
 for (item = itemList; item != NULL; )
... ...
@@ -435,7 +386,7 @@ else
435 386
     errAbort("Unknown type %s\n", typeWord);
436 387
 
437 388
 /* Set up defaults for values we hope to parse out of rest of line. */
438
-int span = 1;
389
+int span = 0;
439 390
 bits32 step = 0;
440 391
 bits32 start = 0;
441 392
 char *chrom = NULL;
... ...
@@ -468,8 +419,8 @@ while ((varEqVal = nextWord(&initialLine)) != NULL)
468 419
  * rest of section. */
469 420
 if (chrom == NULL)
470 421
     errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName);
471
-bits32 chromSize = hashIntVal(chromSizeHash, chrom);
472
-if (start >= chromSize)
422
+bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM);
423
+if (start > chromSize)
473 424
     {
474 425
     warn("line %d of %s: chromosome %s has %u bases, but item starts at %u",
475 426
     	lf->lineIx, lf->fileName, chrom, chromSize, start);
... ...
@@ -482,6 +433,8 @@ if (type == bwgTypeFixedStep)
482 433
 	errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName);
483 434
     if (step == 0)
484 435
 	errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName);
436
+    if (span == 0)
437
+	span = step;
485 438
     parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, 
486 439
     	chrom, chromSize, span, start-1, step, pSectionList);
487 440
     }
... ...
@@ -491,6 +444,8 @@ else
491 444
 	errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName);
492 445
     if (step != 0)
493 446
 	errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName);
447
+    if (span == 0)
448
+	span = 1;
494 449
     parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, 
495 450
     	chrom, chromSize, span, pSectionList);
496 451
     }
... ...
@@ -546,7 +501,7 @@ while (lineFileNextReal(lf, &line))
546 501
         {
547 502
 	lmAllocVar(chromHash->lm, chrom);
548 503
 	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
549
-	chrom->size = hashIntVal(chromSizeHash, chromName);
504
+	chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM);
550 505
 	slAddHead(&chromList, chrom);
551 506
 	}
552 507
 
... ...
@@ -574,10 +529,22 @@ while (lineFileNextReal(lf, &line))
574 529
     }
575 530
 slSort(&chromList, bedGraphChromCmpName);
576 531
 
532
+/* Loop through each chromosome and output the item list, broken into sections
533
+ * for that chrom. */
577 534
 for (chrom = chromList; chrom != NULL; chrom = chrom->next)
578 535
     {
579 536
     slSort(&chrom->itemList, bwgBedGraphItemCmp);
580 537
 
538
+    /* Check to make sure no overlap between items. */
539
+    struct bwgBedGraphItem *item = chrom->itemList, *nextItem;
540
+    for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next)
541
+        {
542
+	if (item->end > nextItem->start)
543
+	    errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again",
544
+	        chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end);
545
+	item = nextItem;
546
+	}
547
+
581 548
     /* Break up into sections of no more than items-per-slot size. */
582 549
     struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
583 550
     for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
... ...
@@ -832,8 +799,7 @@ return outList;
832 799
 }
833 800
 
834 801
 void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
835
-               int blockSize, int itemsPerSlot, boolean doCompress,
836
-               char *fileName)
802
+	int blockSize, int itemsPerSlot, boolean doCompress, char *fileName)
837 803
 /* Create a bigWig file out of a sorted sectionList. */
838 804
 {
839 805
 bits64 sectionCount = slCount(sectionList);
... ...
@@ -862,12 +828,12 @@ struct bbiChromInfo *chromInfoArray;
862 828
 int chromCount, maxChromNameSize;
863 829
 bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize);
864 830
 
865
-/* Figure out initial summary level - starting with a summary 20 times the amount
831
+/* Figure out initial summary level - starting with a summary 10 times the amount
866 832
  * of the smallest item.  See if summarized data is smaller than half input data, if
867 833
  * not bump up reduction by a factor of 2 until it is, or until further summarying
868 834
  * yeilds no size reduction. */
869 835
 int  minRes = bwgAverageResolution(sectionList);
870
-int initialReduction = minRes*20;
836
+int initialReduction = minRes*10;
871 837
 bits64 fullSize = bwgTotalSectionSize(sectionList);
872 838
 bits64 maxReducedSize = fullSize/2;
873 839
 struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL;
... ...
@@ -878,8 +844,7 @@ for (;;)
878 844
     bits64 summarySize = bbiTotalSummarySize(summaryList);
879 845
     if (doCompress)
880 846
 	{
881
-        summarySize *= 4;	// Compensate for summary not compressing as well as primary data
882
-	initialReduction *= 4;
847
+        summarySize *= 2;	// Compensate for summary not compressing as well as primary data
883 848
 	}
884 849
     if (summarySize >= maxReducedSize && summarySize != lastSummarySize)
885 850
         {
... ...
@@ -1049,18 +1014,30 @@ for (i=0; i<summaryCount; ++i)
1049 1014
     writeOne(f, reductionIndexOffsets[i]);
1050 1015
     }
1051 1016
 
1017
+/* Write end signature. */
1018
+fseek(f, 0L, SEEK_END);
1019
+writeOne(f, sig);
1020
+
1052 1021
 /* Clean up */
1053 1022
 freez(&chromInfoArray);
1054 1023
 carefulClose(&f);
1055 1024
 }
1056 1025
 
1057
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash,
1058
-	int maxSectionSize, struct lm *lm)
1026
+struct bwgSection *bwgParseWig(
1027
+	char *fileName,       /* Name of ascii wig file. */
1028
+	boolean clipDontDie,  /* Skip items outside chromosome rather than aborting. */
1029
+	struct hash *chromSizeHash,  /* If non-NULL items checked to be inside chromosome. */
1030
+	int maxSectionSize,   /* Biggest size of a section.  100 - 100,000 is usual range. */
1031
+	struct lm *lm)	      /* Memory pool to allocate from. */
1059 1032
 /* Parse out ascii wig file - allocating memory in lm. */
1060 1033
 {
1061 1034
 struct lineFile *lf = lineFileOpen(fileName, TRUE);
1062 1035
 char *line;
1063 1036
 struct bwgSection *sectionList = NULL;
1037
+
1038
+/* remove initial browser and track lines */
1039
+lineFileRemoveInitialCustomTrackLines(lf);
1040
+
1064 1041
 while (lineFileNextReal(lf, &line))
1065 1042
     {
1066 1043
     verbose(2, "processing %s\n", line);
... ...
@@ -1089,7 +1066,7 @@ while (lineFileNextReal(lf, &line))
1089 1066
     }
1090 1067
 slSort(&sectionList, bwgSectionCmp);
1091 1068
 
1092
-/* Check for overlap. */
1069
+/* Check for overlap at section level. */
1093 1070
 struct bwgSection *section, *nextSection;
1094 1071
 for (section = sectionList; section != NULL; section = nextSection)
1095 1072
     {
Browse code

Add experimental support for bigWig tracks. Import seems to work. Export not so much. Note that rtracklayer now contains a large amount of compiled code -- mostly from the UCSC library.

git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@44644 bc3139a8-67e5-0310-9ffc-ced21a209358

Michael Lawrence authored on 10/02/2010 23:56:11
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,1138 @@
1
+/* bwgCreate - create big wig files.  Implements write side of bwgInternal.h module. 
2
+ * See the comment in bwgInternal.h for a description of the file format. */
3
+
4
+#include "common.h"
5
+#include "linefile.h"
6
+#include "hash.h"
7
+#include "localmem.h"
8
+#include "errabort.h"
9
+#include "sqlNum.h"
10
+#include "sig.h"
11
+#include "zlibFace.h"
12
+#include "bPlusTree.h"
13
+#include "cirTree.h"
14
+#include "bbiFile.h"
15
+#include "bwgInternal.h"
16
+#include "bigWig.h"
17
+
18
+static char const rcsid[] = "$Id: bwgCreate.c,v 1.22 2009/11/25 07:17:25 kent Exp $";
19
+
20
+struct bwgBedGraphItem
21
+/* An bedGraph-type item in a bwgSection. */
22
+    {
23
+    struct bwgBedGraphItem *next;	/* Next in list. */
24
+    bits32 start,end;		/* Range of chromosome covered. */
25
+    float val;			/* Value. */
26
+    };
27
+
28
+struct bwgVariableStepItem
29
+/* An variableStep type item in a bwgSection. */
30
+    {
31
+    struct bwgVariableStepItem *next;	/* Next in list. */
32
+    bits32 start;		/* Start position in chromosome. */
33
+    float val;			/* Value. */
34
+    };
35
+
36
+struct bwgVariableStepPacked
37
+/* An variableStep type item in a bwgSection. */
38
+    {
39
+    bits32 start;		/* Start position in chromosome. */
40
+    float val;			/* Value. */
41
+    };
42
+
43
+struct bwgFixedStepItem
44
+/* An fixedStep type item in a bwgSection. */
45
+    {
46
+    struct bwgFixedStepItem *next;	/* Next in list. */
47
+    float val;			/* Value. */
48
+    };
49
+
50
+struct bwgFixedStepPacked
51
+/* An fixedStep type item in a bwgSection. */
52
+    {
53
+    float val;			/* Value. */
54
+    };
55
+
56
+union bwgItem
57
+/* Union of item pointers for all possible section types. */
58
+    {
59
+    struct bwgBedGraphItem *bedGraphList;		/* A linked list */
60
+    struct bwgFixedStepPacked *fixedStepPacked;		/* An array */
61
+    struct bwgVariableStepPacked *variableStepPacked;	/* An array */
62
+    /* No packed format for bedGraph... */
63
+    };
64
+
65
+struct bwgSection
66
+/* A section of a bigWig file - all on same chrom.  This is a somewhat fat data
67
+ * structure used by the bigWig creation code.  See also bwgSection for the
68
+ * structure returned by the bigWig reading code. */
69
+    {
70
+    struct bwgSection *next;		/* Next in list. */
71
+    char *chrom;			/* Chromosome name. */
72
+    bits32 start,end;			/* Range of chromosome covered. */
73
+    enum bwgSectionType type;
74
+    union bwgItem items;		/* List/array of items in this section. */
75
+    bits32 itemStep;			/* Step within item if applicable. */
76
+    bits32 itemSpan;			/* Item span if applicable. */
77
+    bits16 itemCount;			/* Number of items in section. */
78
+    bits32 chromId;			/* Unique small integer value for chromosome. */
79
+    bits64 fileOffset;			/* Offset of section in file. */
80
+    };
81
+
82
+
83
+static int bwgBedGraphItemCmp(const void *va, const void *vb)
84
+/* Compare to sort based on query start. */
85
+{
86
+const struct bwgBedGraphItem *a = *((struct bwgBedGraphItem **)va);
87
+const struct bwgBedGraphItem *b = *((struct bwgBedGraphItem **)vb);
88
+int dif = (int)a->start - (int)b->start;
89
+if (dif == 0)
90
+    dif = (int)a->end - (int)b->end;
91
+return dif;
92
+}
93
+
94
+static int bwgVariableStepItemCmp(const void *va, const void *vb)
95
+/* Compare to sort based on query start. */
96
+{
97
+const struct bwgVariableStepItem *a = *((struct bwgVariableStepItem **)va);
98
+const struct bwgVariableStepItem *b = *((struct bwgVariableStepItem **)vb);
99
+return (int)a->start - (int)b->start;
100
+}
101
+
102
+void bwgDumpSummary(struct bbiSummary *sum, FILE *f)
103
+/* Write out summary info to file. */
104
+{
105
+fprintf(f, "summary %d:%d-%d min=%f, max=%f, sum=%f, sumSquares=%f, validCount=%d, mean=%f\n",
106
+     sum->chromId, sum->start, sum->end, sum->minVal, sum->maxVal, sum->sumData,
107
+     sum->sumSquares, sum->validCount, sum->sumData/sum->validCount);
108
+}
109
+
110
+static int bwgSectionWrite(struct bwgSection *section, boolean doCompress, FILE *f)
111
+/* Write out section to file, filling in section->fileOffset. */
112
+{
113
+UBYTE type = section->type;
114
+UBYTE reserved8 = 0;
115
+int itemSize;
116
+switch (section->type)
117
+    {
118
+    case bwgTypeBedGraph:
119
+        itemSize = 12;
120
+	break;
121
+    case bwgTypeVariableStep:
122
+        itemSize = 8;
123
+	break;
124
+    case bwgTypeFixedStep:
125
+        itemSize = 4;
126
+	break;
127
+    default:
128
+        itemSize = 0;  // Suppress compiler warning
129
+	internalErr();
130
+	break;
131
+    }
132
+int fixedSize = sizeof(section->chromId) + sizeof(section->start) + sizeof(section->end) + 
133
+     sizeof(section->itemStep) + sizeof(section->itemSpan) + sizeof(type) + sizeof(reserved8) +
134
+     sizeof(section->itemCount);
135
+int bufSize = section->itemCount * itemSize + fixedSize;
136
+char buf[bufSize];
137
+char *bufPt = buf;
138
+
139
+section->fileOffset = ftell(f);
140
+memWriteOne(&bufPt, section->chromId);
141
+memWriteOne(&bufPt, section->start);
142
+memWriteOne(&bufPt, section->end);
143
+memWriteOne(&bufPt, section->itemStep);
144
+memWriteOne(&bufPt, section->itemSpan);
145
+memWriteOne(&bufPt, type);
146
+memWriteOne(&bufPt, reserved8);
147
+memWriteOne(&bufPt, section->itemCount);
148
+
149
+int i;
150
+switch (section->type)
151
+    {
152
+    case bwgTypeBedGraph:
153
+	{
154
+	struct bwgBedGraphItem *item = section->items.bedGraphList;
155
+	for (item = section->items.bedGraphList; item != NULL; item = item->next)
156
+	    {
157
+	    memWriteOne(&bufPt, item->start);
158
+	    memWriteOne(&bufPt, item->end);
159
+	    memWriteOne(&bufPt, item->val);
160
+	    }
161
+	break;
162
+	}
163
+    case bwgTypeVariableStep:
164
+	{
165
+	struct bwgVariableStepPacked *items = section->items.variableStepPacked;
166
+	for (i=0; i<section->itemCount; ++i)
167
+	    {
168
+	    memWriteOne(&bufPt, items->start);
169
+	    memWriteOne(&bufPt, items->val);
170
+	    items += 1;
171
+	    }
172
+	break;
173
+	}
174
+    case bwgTypeFixedStep:
175
+	{
176
+	struct bwgFixedStepPacked *items = section->items.fixedStepPacked;
177
+	for (i=0; i<section->itemCount; ++i)
178
+	    {
179
+	    memWriteOne(&bufPt, items->val);
180
+	    items += 1;
181
+	    }
182
+	break;
183
+	}
184
+    default:
185
+        internalErr();
186
+	break;
187
+    }
188
+assert(bufSize == (bufPt - buf) );
189
+
190
+if (doCompress)
191
+    {
192
+    size_t maxCompSize = zCompBufSize(bufSize);
193
+    char compBuf[maxCompSize];
194
+    int compSize = zCompress(buf, bufSize, compBuf, maxCompSize);
195
+    mustWrite(f, compBuf, compSize);
196
+    }
197
+else
198
+    mustWrite(f, buf, bufSize);
199
+return bufSize;
200
+}
201
+
202
+
203
+static int bwgSectionCmp(const void *va, const void *vb)
204
+/* Compare to sort based on chrom,start,end.  */
205
+{
206
+const struct bwgSection *a = *((struct bwgSection **)va);
207
+const struct bwgSection *b = *((struct bwgSection **)vb);
208
+int dif = strcmp(a->chrom, b->chrom);
209
+if (dif == 0)
210
+    {
211
+    dif = (int)a->start - (int)b->start;
212
+    if (dif == 0)
213
+	dif = (int)a->end - (int)b->end;
214
+    }
215
+return dif;
216
+}
217
+
218
+static struct cirTreeRange bwgSectionFetchKey(const void *va, void *context)
219
+/* Fetch bwgSection key for r-tree */
220
+{
221
+struct cirTreeRange res;
222
+const struct bwgSection *a = *((struct bwgSection **)va);
223
+res.chromIx = a->chromId;
224
+res.start = a->start;
225
+res.end = a->end;
226
+return res;
227
+}
228
+
229
+static bits64 bwgSectionFetchOffset(const void *va, void *context)
230
+/* Fetch bwgSection file offset for r-tree */
231
+{
232
+const struct bwgSection *a = *((struct bwgSection **)va);
233
+return a->fileOffset;
234
+}
235
+
236
+static boolean stepTypeLine(char *line)
237
+/* Return TRUE if it's a variableStep or fixedStep line. */
238
+{
239
+return (startsWithWord("variableStep", line) || startsWithWord("fixedStep", line));
240
+}
241
+
242
+static boolean steppedSectionEnd(char *line, int maxWords)
243
+/* Return TRUE if line indicates the start of another section. */
244
+{
245
+int wordCount = chopByWhite(line, NULL, 5);
246
+if (wordCount > maxWords)
247
+    return TRUE;
248
+return stepTypeLine(line);
249
+}
250
+
251
+static void parseFixedStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
252
+	int itemsPerSlot, char *chrom, bits32 chromSize, bits32 span, bits32 sectionStart, 
253
+	bits32 step, struct bwgSection **pSectionList)
254
+/* Read the single column data in section until get to end. */
255
+{
256
+struct lm *lmLocal = lmInit(0);
257
+
258
+/* Stream through section until get to end of file or next section,
259
+ * adding values from single column to list. */
260
+char *words[1];
261
+char *line;
262
+struct bwgFixedStepItem *item, *itemList = NULL;
263
+int originalSectionSize = 0;
264
+bits32 sectionEnd = sectionStart;
265
+while (lineFileNextReal(lf, &line))
266
+    {
267
+    if (steppedSectionEnd(line, 1))
268
+	{
269
+        lineFileReuse(lf);
270
+	break;
271
+	}
272
+    chopLine(line, words);
273
+    lmAllocVar(lmLocal, item);
274
+    item->val = lineFileNeedDouble(lf, words, 0);
275
+    if (sectionEnd + span > chromSize)
276
+	{
277
+	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
278
+	    lf->lineIx, lf->fileName, chrom, chromSize, sectionEnd + span);
279
+	if (!clipDontDie)
280
+	    noWarnAbort();
281
+	}
282
+    else
283
+	{
284
+	slAddHead(&itemList, item);
285
+	++originalSectionSize;
286
+	}
287
+    sectionEnd += step;
288
+    }
289
+slReverse(&itemList);
290
+
291
+/* Break up into sections of no more than items-per-slot size, and convert to packed format. */
292
+int sizeLeft = originalSectionSize;
293
+for (item = itemList; item != NULL; )
294
+    {
295
+    /* Figure out size of this section  */
296
+    int sectionSize = sizeLeft;
297
+    if (sectionSize > itemsPerSlot)
298
+        sectionSize = itemsPerSlot;
299
+    sizeLeft -= sectionSize;
300
+
301
+
302
+    /* Allocate and fill in section. */
303
+    struct bwgSection *section;
304
+    lmAllocVar(lm, section);
305
+    section->chrom = chrom;
306
+    section->start = sectionStart;
307
+    sectionStart += sectionSize * step;
308
+    section->end = sectionStart - step + span;
309
+    section->type = bwgTypeFixedStep;
310
+    section->itemStep = step;
311
+    section->itemSpan = span;
312
+    section->itemCount = sectionSize;
313
+
314
+    /* Allocate array for data, and copy from list to array representation */
315
+    struct bwgFixedStepPacked *packed;		/* An array */
316
+    section->items.fixedStepPacked = lmAllocArray(lm, packed, sectionSize);
317
+    int i;
318
+    for (i=0; i<sectionSize; ++i)
319
+        {
320
+	packed->val = item->val;
321
+	item = item->next;
322
+	++packed;
323
+	}
324
+
325
+    /* Add section to list. */
326
+    slAddHead(pSectionList, section);
327
+    }
328
+lmCleanup(&lmLocal);
329
+}
330
+
331
+static void parseVariableStepSection(struct lineFile *lf, boolean clipDontDie, struct lm *lm,
332
+	int itemsPerSlot, char *chrom, int chromSize, bits32 span, struct bwgSection **pSectionList)
333
+/* Read the single column data in section until get to end. */
334
+{
335
+struct lm *lmLocal = lmInit(0);
336
+
337
+/* Stream through section until get to end of file or next section,
338
+ * adding values from single column to list. */
339
+char *words[2];
340
+char *line;
341
+struct bwgVariableStepItem *item, *itemList = NULL;
342
+int originalSectionSize = 0;
343
+while (lineFileNextReal(lf, &line))
344
+    {
345
+    if (steppedSectionEnd(line, 2))
346
+	{
347
+        lineFileReuse(lf);
348
+	break;
349
+	}
350
+    chopLine(line, words);
351
+    lmAllocVar(lmLocal, item);
352
+    int start = lineFileNeedNum(lf, words, 0);
353
+    if (start <= 0)
354
+	{
355
+	errAbort("line %d of %s: zero or negative chromosome coordinate not allowed",
356
+	    lf->lineIx, lf->fileName);
357
+	}
358
+    item->start = start - 1;
359
+    item->val = lineFileNeedDouble(lf, words, 1);
360
+    if (item->start + span > chromSize)
361
+        {
362
+	warn("line %d of %s: chromosome %s has %u bases, but item ends at %u",
363
+	    lf->lineIx, lf->fileName, chrom, chromSize, item->start + span);
364
+	if (!clipDontDie)
365
+	    noWarnAbort();
366
+	}
367
+    else
368
+        {
369
+	slAddHead(&itemList, item);
370
+	++originalSectionSize;
371
+	}
372
+    }
373
+slSort(&itemList, bwgVariableStepItemCmp);
374
+
375
+/* Break up into sections of no more than items-per-slot size. */
376
+int sizeLeft = originalSectionSize;
377
+for (item = itemList; item != NULL; )
378
+    {
379
+    /* Figure out size of this section  */
380
+    int sectionSize = sizeLeft;
381
+    if (sectionSize > itemsPerSlot)
382
+        sectionSize = itemsPerSlot;
383
+    sizeLeft -= sectionSize;
384
+
385
+    /* Convert from list to array representation. */
386
+    struct bwgVariableStepPacked *packed, *p;		
387
+    p = lmAllocArray(lm, packed, sectionSize);
388
+    int i;
389
+    for (i=0; i<sectionSize; ++i)
390
+        {
391
+	p->start = item->start;
392
+	p->val = item->val;
393
+	item = item->next;
394
+	++p;
395
+	}
396
+
397
+    /* Fill in section and add it to list. */
398
+    struct bwgSection *section;
399
+    lmAllocVar(lm, section);
400
+    section->chrom = chrom;
401
+    section->start = packed[0].start;
402
+    section->end = packed[sectionSize-1].start + span;
403
+    section->type = bwgTypeVariableStep;
404
+    section->items.variableStepPacked = packed;
405
+    section->itemSpan = span;
406
+    section->itemCount = sectionSize;
407
+    slAddHead(pSectionList, section);
408
+    }
409
+lmCleanup(&lmLocal);
410
+}
411
+
412
+static unsigned parseUnsignedVal(struct lineFile *lf, char *var, char *val)
413
+/* Return val as an integer, printing error message if it's not. */
414
+{
415
+char c = val[0];
416
+if (!isdigit(c))
417
+    errAbort("Expecting numerical value for %s, got %s, line %d of %s", 
418
+    	var, val, lf->lineIx, lf->fileName);
419
+return sqlUnsigned(val);
420
+}
421
+
422
+static void parseSteppedSection(struct lineFile *lf, boolean clipDontDie, 
423
+	struct hash *chromSizeHash, char *initialLine, 
424
+	struct lm *lm, int itemsPerSlot, struct bwgSection **pSectionList)
425
+/* Parse out a variableStep or fixedStep section and add it to list, breaking it up as need be. */
426
+{
427
+/* Parse out first word of initial line and make sure it is something we recognize. */
428
+char *typeWord = nextWord(&initialLine);
429
+enum bwgSectionType type = bwgTypeFixedStep;
430
+if (sameString(typeWord, "variableStep"))
431
+    type = bwgTypeVariableStep;
432
+else if (sameString(typeWord, "fixedStep"))
433
+    type = bwgTypeFixedStep;
434
+else
435
+    errAbort("Unknown type %s\n", typeWord);
436
+
437
+/* Set up defaults for values we hope to parse out of rest of line. */
438
+int span = 1;
439
+bits32 step = 0;
440
+bits32 start = 0;
441
+char *chrom = NULL;
442
+
443
+/* Parse out var=val pairs. */
444
+char *varEqVal;
445
+while ((varEqVal = nextWord(&initialLine)) != NULL)
446
+    {
447
+    char *wordPairs[2];
448
+    int wc = chopByChar(varEqVal, '=', wordPairs, 2);
449
+    if (wc != 2)
450
+        errAbort("strange var=val pair line %d of %s", lf->lineIx, lf->fileName);
451
+    char *var = wordPairs[0];
452
+    char *val = wordPairs[1];
453
+    if (sameString(var, "chrom"))
454
+        chrom = cloneString(val);
455
+    else if (sameString(var, "span"))
456
+	span = parseUnsignedVal(lf, var, val);
457
+    else if (sameString(var, "step"))
458
+	step = parseUnsignedVal(lf, var, val);
459
+    else if (sameString(var, "start"))
460
+	{
461
+        start = parseUnsignedVal(lf, var, val);
462
+	}
463
+    else
464
+	errAbort("Unknown setting %s=%s line %d of %s", var, val, lf->lineIx, lf->fileName);
465
+    }
466
+
467
+/* Check that we have all that are required and no more, and call type-specific routine to parse
468
+ * rest of section. */
469
+if (chrom == NULL)
470
+    errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName);
471
+bits32 chromSize = hashIntVal(chromSizeHash, chrom);
472
+if (start >= chromSize)
473
+    {
474
+    warn("line %d of %s: chromosome %s has %u bases, but item starts at %u",
475
+    	lf->lineIx, lf->fileName, chrom, chromSize, start);
476
+    if (!clipDontDie)
477
+        noWarnAbort();
478
+    }
479
+if (type == bwgTypeFixedStep)
480
+    {
481
+    if (start == 0)
482
+	errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName);
483
+    if (step == 0)
484
+	errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName);
485
+    parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, 
486
+    	chrom, chromSize, span, start-1, step, pSectionList);
487
+    }
488
+else
489
+    {
490
+    if (start != 0)
491
+	errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName);
492
+    if (step != 0)
493
+	errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName);
494
+    parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, 
495
+    	chrom, chromSize, span, pSectionList);
496
+    }
497
+}
498
+
499
+struct bedGraphChrom
500
+/* A chromosome in bed graph format. */
501
+    {
502
+    struct bedGraphChrom *next;		/* Next in list. */
503
+    char *name;			/* Chromosome name - not allocated here. */
504
+    bits32 size;		/* Chromosome size. */
505
+    struct bwgBedGraphItem *itemList;	/* List of items. */
506
+    };
507
+
508
+static int bedGraphChromCmpName(const void *va, const void *vb)
509
+/* Compare to sort based on query start. */
510
+{
511
+const struct bedGraphChrom *a = *((struct bedGraphChrom **)va);
512
+const struct bedGraphChrom *b = *((struct bedGraphChrom **)vb);
513
+return strcmp(a->name, b->name);
514
+}
515
+
516
+static void parseBedGraphSection(struct lineFile *lf, boolean clipDontDie, 
517
+	struct hash *chromSizeHash, struct lm *lm, 
518
+	int itemsPerSlot, struct bwgSection **pSectionList)
519
+/* Parse out bedGraph section until we get to something that is not in bedGraph format. */
520
+{
521
+/* Set up hash and list to store chromosomes. */
522
+struct hash *chromHash = hashNew(0);
523
+struct bedGraphChrom *chrom, *chromList = NULL;
524
+
525
+/* Collect lines in items on appropriate chromosomes. */
526
+struct bwgBedGraphItem *item;
527
+char *line;
528
+while (lineFileNextReal(lf, &line))
529
+    {
530
+    /* Check for end of section. */
531
+    if (stepTypeLine(line))
532
+        {
533
+	lineFileReuse(lf);
534
+	break;
535
+	}
536
+
537
+    /* Parse out our line and make sure it has exactly 4 columns. */
538
+    char *words[5];
539
+    int wordCount = chopLine(line, words);
540
+    lineFileExpectWords(lf, 4, wordCount);
541
+
542
+    /* Get chromosome. */
543
+    char *chromName = words[0];
544
+    chrom = hashFindVal(chromHash, chromName);
545
+    if (chrom == NULL)
546
+        {
547
+	lmAllocVar(chromHash->lm, chrom);
548
+	hashAddSaveName(chromHash, chromName, chrom, &chrom->name);
549
+	chrom->size = hashIntVal(chromSizeHash, chromName);
550
+	slAddHead(&chromList, chrom);
551
+	}
552
+
553
+    /* Convert to item and add to chromosome list. */
554
+    lmAllocVar(lm, item);
555
+    item->start = lineFileNeedNum(lf, words, 1);
556
+    item->end = lineFileNeedNum(lf, words, 2);
557
+    item->val = lineFileNeedDouble(lf, words, 3);
558
+
559
+    /* Do sanity checking on coordinates. */
560
+    if (item->start > item->end)
561
+        errAbort("bedGraph error: start (%u) after end line (%u) %d of %s.", 
562
+		item->start, item->end, lf->lineIx, lf->fileName);
563
+    if (item->end > chrom->size)
564
+	{
565
+        warn("bedGraph error line %d of %s: chromosome %s has size %u but item ends at %u",
566
+	        lf->lineIx, lf->fileName, chrom->name, chrom->size, item->end);
567
+	if (!clipDontDie)
568
+	    noWarnAbort();
569
+	}
570
+    else
571
+	{
572
+	slAddHead(&chrom->itemList, item);
573
+	}
574
+    }
575
+slSort(&chromList, bedGraphChromCmpName);
576
+
577
+for (chrom = chromList; chrom != NULL; chrom = chrom->next)
578
+    {
579
+    slSort(&chrom->itemList, bwgBedGraphItemCmp);
580
+
581
+    /* Break up into sections of no more than items-per-slot size. */
582
+    struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList;
583
+    for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem)
584
+	{
585
+	/* Find end item of this section, and start item for next section.
586
+	 * Terminate list at end item. */
587
+	int sectionSize = 0;
588
+	int i;
589
+	endItem = startItem;
590
+	for (i=0; i<itemsPerSlot; ++i)
591
+	    {
592
+	    if (nextStartItem == NULL)
593
+		break;
594
+	    endItem = nextStartItem;
595
+	    nextStartItem = nextStartItem->next;
596
+	    ++sectionSize;
597
+	    }
598
+	endItem->next = NULL;
599
+
600
+	/* Fill in section and add it to section list. */
601
+	struct bwgSection *section;
602
+	lmAllocVar(lm, section);
603
+	section->chrom = cloneString(chrom->name);
604
+	section->start = startItem->start;
605
+	section->end = endItem->end;
606
+	section->type = bwgTypeBedGraph;
607
+	section->items.bedGraphList = startItem;
608
+	section->itemCount = sectionSize;
609
+	slAddHead(pSectionList, section);
610
+	}
611
+    }
612
+
613
+/* Free up hash, no longer needed. Free's chromList as a side effect since chromList is in 
614
+ * hash's memory. */
615
+hashFree(&chromHash);
616
+chromList = NULL;
617
+}
618
+
619
+void bwgMakeChromInfo(struct bwgSection *sectionList, struct hash *chromSizeHash,
620
+	int *retChromCount, struct bbiChromInfo **retChromArray,
621
+	int *retMaxChromNameSize)
622
+/* Fill in chromId field in sectionList.  Return array of chromosome name/ids. 
623
+ * The chromSizeHash is keyed by name, and has int values. */
624
+{
625
+/* Build up list of unique chromosome names. */
626
+struct bwgSection *section;
627
+char *chromName = "";
628
+int chromCount = 0;
629
+int maxChromNameSize = 0;
630
+struct slRef *uniq, *uniqList = NULL;
631
+for (section = sectionList; section != NULL; section = section->next)
632
+    {
633
+    if (!sameString(section->chrom, chromName))
634
+        {
635
+	chromName = section->chrom;
636
+	refAdd(&uniqList, chromName);
637
+	++chromCount;
638
+	int len = strlen(chromName);
639
+	if (len > maxChromNameSize)
640
+	    maxChromNameSize = len;
641
+	}
642
+    section->chromId = chromCount-1;
643
+    }
644
+slReverse(&uniqList);
645
+
646
+/* Allocate and fill in results array. */
647
+struct bbiChromInfo *chromArray;
648
+AllocArray(chromArray, chromCount);
649
+int i;
650
+for (i = 0, uniq = uniqList; i < chromCount; ++i, uniq = uniq->next)
651
+    {
652
+    chromArray[i].name = uniq->val;
653
+    chromArray[i].id = i;
654
+    chromArray[i].size = hashIntVal(chromSizeHash, uniq->val);
655
+    }
656
+
657
+/* Clean up, set return values and go home. */
658
+slFreeList(&uniqList);
659
+*retChromCount = chromCount;
660
+*retChromArray = chromArray;
661
+*retMaxChromNameSize = maxChromNameSize;
662
+}
663
+
664
+int bwgAverageResolution(struct bwgSection *sectionList)
665
+/* Return the average resolution seen in sectionList. */
666
+{
667
+if (sectionList == NULL)
668
+    return 1;
669
+bits64 totalRes = 0;
670
+bits32 sectionCount = 0;
671
+struct bwgSection *section;
672
+int i;
673
+for (section = sectionList; section != NULL; section = section->next)
674
+    {
675
+    int sectionRes = 0;
676
+    switch (section->type)
677
+        {
678
+	case bwgTypeBedGraph:
679
+	    {
680
+	    struct bwgBedGraphItem *item;
681
+	    sectionRes = BIGNUM;
682
+	    for (item = section->items.bedGraphList; item != NULL; item = item->next)
683
+		{
684
+		int size = item->end - item->start;
685
+		if (sectionRes > size)
686
+		    sectionRes = size;
687
+		}
688
+	    break;
689
+	    }
690
+	case bwgTypeVariableStep:
691
+	    {
692
+	    struct bwgVariableStepPacked *items = section->items.variableStepPacked, *prev;
693
+	    bits32 smallestGap = BIGNUM;
694
+	    for (i=1; i<section->itemCount; ++i)
695
+	        {
696
+		prev = items;
697
+		items += 1;
698
+		bits32 gap = items->start - prev->start;
699
+		if (smallestGap > gap)
700
+		    smallestGap = gap;
701
+		}
702
+	    if (smallestGap != BIGNUM)
703
+	        sectionRes = smallestGap;
704
+	    else
705
+	        sectionRes = section->itemSpan;
706
+	    break;
707
+	    }
708
+	case bwgTypeFixedStep:
709
+	    {
710
+	    sectionRes = section->itemStep;
711
+	    break;
712
+	    }
713
+	default:
714
+	    internalErr();
715
+	    break;
716
+	}
717
+    totalRes += sectionRes;
718
+    ++sectionCount;
719
+    }
720
+return (totalRes + sectionCount/2)/sectionCount;
721
+}
722
+
723
+#define bwgSectionHeaderSize 24	/* Size of section header in file. */
724
+
725
+static int bwgItemSize(enum bwgSectionType type)
726
+/* Return size of an item inside a particular section. */
727
+{
728
+switch (type)
729
+    {
730
+    case bwgTypeBedGraph:
731
+	return 2*sizeof(bits32) + sizeof(float);
732
+	break;
733
+    case bwgTypeVariableStep:
734
+	return sizeof(bits32) + sizeof(float);
735
+	break;
736
+    case bwgTypeFixedStep:
737
+	return sizeof(float);
738
+	break;
739
+    default:
740
+        internalErr();
741
+	return 0;
742
+    }
743
+}
744
+
745
+static int bwgSectionSize(struct bwgSection *section)
746
+/* Return size (on disk) of section. */
747
+{
748
+return bwgSectionHeaderSize + bwgItemSize(section->type) * section->itemCount;
749
+}
750
+
751
+static bits64 bwgTotalSectionSize(struct bwgSection *sectionList)
752
+/* Return total size of all sections. */
753
+{
754
+bits64 total = 0;
755
+struct bwgSection *section;
756
+for (section = sectionList; section != NULL; section = section->next)
757
+    total += bwgSectionSize(section);
758
+return total;
759
+}
760
+
761
+static void bwgReduceBedGraph(struct bwgSection *section, bits32 chromSize, int reduction, 
762
+	struct bbiSummary **pOutList)
763
+/*Reduce a bedGraph section onto outList. */
764
+{
765
+struct bwgBedGraphItem *item = section->items.bedGraphList;
766
+for (item = section->items.bedGraphList; item != NULL; item = item->next)
767
+    {
768
+    bbiAddRangeToSummary(section->chromId, chromSize, item->start, item->end, 
769
+    	item->val, reduction, pOutList);
770
+    }
771
+}
772
+
773
+static void bwgReduceVariableStep(struct bwgSection *section, bits32 chromSize, int reduction, 
774
+	struct bbiSummary **pOutList)
775
+/*Reduce a variableStep section onto outList. */
776
+{
777
+struct bwgVariableStepPacked *items = section->items.variableStepPacked;
778
+int i;
779
+for (i=0; i<section->itemCount; ++i)
780
+    {
781
+    bbiAddRangeToSummary(section->chromId, chromSize, 
782
+    	items->start, items->start + section->itemSpan, items->val, reduction, pOutList);
783
+    items += 1;
784
+    }
785
+}
786
+
787
+static void bwgReduceFixedStep(struct bwgSection *section, bits32 chromSize, int reduction, 
788
+	struct bbiSummary **pOutList)
789
+/*Reduce a fixedStep section onto outList. */
790
+{
791
+struct bwgFixedStepPacked *items = section->items.fixedStepPacked;
792
+int start = section->start;
793
+int i;
794
+for (i=0; i<section->itemCount; ++i)
795
+    {
796
+    bbiAddRangeToSummary(section->chromId, chromSize, start, start + section->itemSpan, items->val, 
797
+    	reduction, pOutList);
798
+    start += section->itemStep;
799
+    items += 1;
800
+    }
801
+}
802
+
803
+struct bbiSummary *bwgReduceSectionList(struct bwgSection *sectionList, 
804
+	struct bbiChromInfo *chromInfoArray, int reduction)
805
+/* Return summary of section list reduced by given amount. */
806
+{
807
+struct bbiSummary *outList = NULL;
808
+struct bwgSection *section = NULL;
809
+
810
+/* Loop through input section list reducing into outList. */
811
+for (section = sectionList; section != NULL; section = section->next)
812
+    {
813
+    bits32 chromSize = chromInfoArray[section->chromId].size;
814
+    switch (section->type)
815
+        {
816
+	case bwgTypeBedGraph:
817
+	    bwgReduceBedGraph(section, chromSize, reduction, &outList);
818
+	    break;
819
+	case bwgTypeVariableStep:
820
+	    bwgReduceVariableStep(section, chromSize, reduction, &outList);
821
+	    break;
822
+	case bwgTypeFixedStep:
823
+	    bwgReduceFixedStep(section, chromSize, reduction, &outList);
824
+	    break;
825
+	default:
826
+	    internalErr();
827
+	    return 0;
828
+	}
829
+    }
830
+slReverse(&outList);
831
+return outList;
832
+}
833
+
834
+void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, 
835
+               int blockSize, int itemsPerSlot, boolean doCompress,
836
+               char *fileName)
837
+/* Create a bigWig file out of a sorted sectionList. */