git-svn-id: file:///home/git/hedgehog.fhcrc.org/bioconductor/trunk/madman/Rpacks/rtracklayer@57674 bc3139a8-67e5-0310-9ffc-ced21a209358
1 | 1 |
deleted file mode 100644 |
... | ... |
@@ -1,74 +0,0 @@ |
1 |
-/* Stuff that should be in ucsc/bwgInternal.h, but wasn't, so |
|
2 |
- rtracklayer put it here. */ |
|
3 |
- |
|
4 |
-struct bwgBedGraphItem |
|
5 |
-/* An bedGraph-type item in a bwgSection. */ |
|
6 |
-{ |
|
7 |
- struct bwgBedGraphItem *next; /* Next in list. */ |
|
8 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
9 |
- float val; /* Value. */ |
|
10 |
-}; |
|
11 |
- |
|
12 |
-struct bwgVariableStepItem |
|
13 |
-/* An variableStep type item in a bwgSection. */ |
|
14 |
-{ |
|
15 |
- struct bwgVariableStepItem *next; /* Next in list. */ |
|
16 |
- bits32 start; /* Start position in chromosome. */ |
|
17 |
- float val; /* Value. */ |
|
18 |
-}; |
|
19 |
- |
|
20 |
-struct bwgVariableStepPacked |
|
21 |
-/* An variableStep type item in a bwgSection. */ |
|
22 |
-{ |
|
23 |
- bits32 start; /* Start position in chromosome. */ |
|
24 |
- float val; /* Value. */ |
|
25 |
-}; |
|
26 |
- |
|
27 |
-struct bwgFixedStepItem |
|
28 |
-/* An fixedStep type item in a bwgSection. */ |
|
29 |
-{ |
|
30 |
- struct bwgFixedStepItem *next; /* Next in list. */ |
|
31 |
- float val; /* Value. */ |
|
32 |
-}; |
|
33 |
- |
|
34 |
-struct bwgFixedStepPacked |
|
35 |
-/* An fixedStep type item in a bwgSection. */ |
|
36 |
-{ |
|
37 |
- float val; /* Value. */ |
|
38 |
-}; |
|
39 |
- |
|
40 |
-union bwgItem |
|
41 |
-/* Union of item pointers for all possible section types. */ |
|
42 |
-{ |
|
43 |
- struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
44 |
- struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
45 |
- struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
46 |
- /* No packed format for bedGraph... */ |
|
47 |
-}; |
|
48 |
- |
|
49 |
-struct bwgSection |
|
50 |
-/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
51 |
- * structure used by the bigWig creation code. See also bwgSection for the |
|
52 |
- * structure returned by the bigWig reading code. */ |
|
53 |
-{ |
|
54 |
- struct bwgSection *next; /* Next in list. */ |
|
55 |
- char *chrom; /* Chromosome name. */ |
|
56 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
57 |
- enum bwgSectionType type; |
|
58 |
- union bwgItem items; /* List/array of items in this section. */ |
|
59 |
- bits32 itemStep; /* Step within item if applicable. */ |
|
60 |
- bits32 itemSpan; /* Item span if applicable. */ |
|
61 |
- bits16 itemCount; /* Number of items in section. */ |
|
62 |
- bits32 chromId; /* Unique small integer value for chromosome. */ |
|
63 |
- bits64 fileOffset; /* Offset of section in file. */ |
|
64 |
-}; |
|
65 |
- |
|
66 |
-void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
|
67 |
- int blockSize, int itemsPerSlot, boolean doCompress, |
|
68 |
- char *fileName); |
|
69 |
- |
|
70 |
-struct bbiFile *bigWigFileOpen(char *fileName); |
|
71 |
- |
|
72 |
-struct bbiInterval *bigWigIntervalQuery(struct bbiFile *bwf, char *chrom, |
|
73 |
- bits32 start, bits32 end, |
|
74 |
- struct lm *lm); |
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-/* rtracklayer took out this little utility from cheapcgi.c */ |
|
1 |
+/* rtracklayer took out these little utilities from cheapcgi.c */ |
|
2 | 2 |
|
3 | 3 |
#include "common.h" |
4 | 4 |
|
... | ... |
@@ -28,3 +28,49 @@ void cgiDecode(char *in, char *out, int inLength) |
28 | 28 |
} |
29 | 29 |
*out++ = 0; |
30 | 30 |
} |
31 |
+ |
|
32 |
+char *cgiEncode(char *inString) |
|
33 |
+/* Return a cgi-encoded version of inString. |
|
34 |
+ * Alphanumerics kept as is, space translated to plus, |
|
35 |
+ * and all other characters translated to %hexVal. */ |
|
36 |
+{ |
|
37 |
+ char c; |
|
38 |
+ int outSize = 0; |
|
39 |
+ char *outString, *out, *in; |
|
40 |
+ |
|
41 |
+ if (inString == NULL) |
|
42 |
+ return(cloneString("")); |
|
43 |
+ |
|
44 |
+ /* Count up how long it will be */ |
|
45 |
+ in = inString; |
|
46 |
+ while ((c = *in++) != 0) |
|
47 |
+ { |
|
48 |
+ if (isalnum(c) || c == ' ' || c == '.' || c == '_') |
|
49 |
+ outSize += 1; |
|
50 |
+ else |
|
51 |
+ outSize += 3; |
|
52 |
+ } |
|
53 |
+ outString = needMem(outSize+1); |
|
54 |
+ |
|
55 |
+ /* Encode string */ |
|
56 |
+ in = inString; |
|
57 |
+ out = outString; |
|
58 |
+ while ((c = *in++) != 0) |
|
59 |
+ { |
|
60 |
+ if (isalnum(c) || c == '.' || c == '_') |
|
61 |
+ *out++ = c; |
|
62 |
+ else if (c == ' ') |
|
63 |
+ *out++ = '+'; |
|
64 |
+ else |
|
65 |
+ { |
|
66 |
+ unsigned char uc = c; |
|
67 |
+ char buf[4]; |
|
68 |
+ *out++ = '%'; |
|
69 |
+ safef(buf, sizeof(buf), "%02X", uc); |
|
70 |
+ *out++ = buf[0]; |
|
71 |
+ *out++ = buf[1]; |
|
72 |
+ } |
|
73 |
+ } |
|
74 |
+ *out++ = 0; |
|
75 |
+ return outString; |
|
76 |
+} |
... | ... |
@@ -10,47 +10,38 @@ void makeDirsOnPath(char *pathName) |
10 | 10 |
* (It's not considered a problem for the directory to already |
11 | 11 |
* exist. ) */ |
12 | 12 |
{ |
13 |
-/* Save a copy of current directory. */ |
|
14 |
-char *curDir = cloneString(getCurrentDir()); |
|
15 | 13 |
|
16 |
-/* Return current directory. Abort if it fails. */ |
|
17 |
-/* Make local copy of pathName. */ |
|
18 |
-int len = strlen(pathName); |
|
19 |
-char pathCopy[len+1]; |
|
20 |
-strcpy(pathCopy, pathName); |
|
14 |
+ /* shortcut for paths that already exist */ |
|
15 |
+ if (fileExists(pathName)) |
|
16 |
+ return; |
|
21 | 17 |
|
22 |
-/* Start at root if it's an absolute path name. */ |
|
23 |
-char *s = pathCopy, *e; |
|
24 |
-if (pathCopy[0] == '/') |
|
25 |
- { |
|
26 |
-setCurrentDir("/"); |
|
27 |
-++s; |
|
28 |
-} |
|
18 |
+ /* Make local copy of pathName. */ |
|
19 |
+ int len = strlen(pathName); |
|
20 |
+ char pathCopy[len+1]; |
|
21 |
+ strcpy(pathCopy, pathName); |
|
29 | 22 |
|
30 |
-/* Step through it one slash at a time - changing directory if possible |
|
31 |
- * else making directory if possible, else dying. */ |
|
32 |
-for (; !isEmpty(s); s = e) |
|
33 |
- { |
|
34 |
-/* Find end of this section and terminate string there. */ |
|
35 |
-e = strchr(s, '/'); |
|
36 |
-if (e != NULL) |
|
37 |
- *e++ = 0; |
|
23 |
+ /* Tolerate double-slashes in path, everyone else does it. */ |
|
38 | 24 |
|
39 |
-/* Cd there. If that fails mkdir there and then cd there. */ |
|
40 |
-if (!maybeSetCurrentDir(s)) |
|
41 |
- { |
|
42 |
-if (!makeDir(s)) |
|
43 |
- { |
|
44 |
-break; |
|
45 |
-} |
|
46 |
-setCurrentDir(s); |
|
47 |
-} |
|
48 |
-} |
|
49 |
-setCurrentDir(curDir); |
|
50 |
-freeMem(curDir); |
|
25 |
+ /* Start at root if it's an absolute path name. */ |
|
26 |
+ char *s = pathCopy, *e; |
|
27 |
+ while (*s++ == '/') |
|
28 |
+ /* do nothing */; |
|
29 |
+ |
|
30 |
+ /* Step through it one slash at a time |
|
31 |
+ * making directory if possible, else dying. */ |
|
32 |
+ for (; !isEmpty(s); s = e) |
|
33 |
+ { |
|
34 |
+ /* Find end of this section and terminate string there. */ |
|
35 |
+ e = strchr(s, '/'); |
|
36 |
+ if (e != NULL) |
|
37 |
+ *e = 0; |
|
38 |
+ makeDir(pathCopy); |
|
39 |
+ if (e != NULL) |
|
40 |
+ *e++ = '/'; |
|
41 |
+ } |
|
51 | 42 |
} |
52 | 43 |
|
53 |
-/* some shared utilities */ |
|
44 |
+/* some shared utilities (osunix.c) */ |
|
54 | 45 |
|
55 | 46 |
struct fileInfo *newFileInfo(char *name, off_t size, bool isDir, int statErrno, |
56 | 47 |
time_t lastAccess) |
... | ... |
@@ -49,13 +49,14 @@ |
49 | 49 |
* sumData 4 bytes float |
50 | 50 |
* sumSquares 4 bytes float |
51 | 51 |
* zoom index cirTree index |
52 |
+ * magic# 4 bytes - same as magic number at start of header |
|
52 | 53 |
*/ |
53 | 54 |
|
54 | 55 |
#ifndef CIRTREE_H |
55 | 56 |
#include "cirTree.h" |
56 | 57 |
#endif |
57 | 58 |
|
58 |
-#define bbiCurrentVersion 3 |
|
59 |
+#define bbiCurrentVersion 4 |
|
59 | 60 |
/* Version history (of file format, not utilities - corresponds to version field in header) |
60 | 61 |
* 1 - Initial release |
61 | 62 |
* 1 - Unfortunately when attempting a transparent change to encoders, made the sectionCount |
... | ... |
@@ -66,6 +67,8 @@ |
66 | 67 |
* zoom in files made by bedToBigBed and bedGraphToBigWig. (The older wigToBigWig was fine.) |
67 | 68 |
* Added totalSummary section. |
68 | 69 |
* 3 - Adding zlib compression. Only active if uncompressBufSize is non-zero in header. |
70 |
+ * 4 - Fixed problem in encoder for the max field in zoom levels higher than the first one. |
|
71 |
+ * Added an extra sig at end of file. |
|
69 | 72 |
*/ |
70 | 73 |
|
71 | 74 |
struct bbiZoomLevel |
... | ... |
@@ -349,4 +352,10 @@ bits64 bbiWriteSummaryAndIndex(struct bbiSummary *summaryList, |
349 | 352 |
/* Write out summary and index to summary, returning start position of |
350 | 353 |
* summary index. */ |
351 | 354 |
|
355 |
+boolean bbiFileCheckSigs(char *fileName, bits32 sig, char *typeName); |
|
356 |
+/* check file signatures at beginning and end of file */ |
|
357 |
+ |
|
358 |
+time_t bbiUpdateTime(struct bbiFile *bbi); |
|
359 |
+/* return bbi->udc->updateTime */ |
|
360 |
+ |
|
352 | 361 |
#endif /* BBIFILE_H */ |
... | ... |
@@ -37,6 +37,44 @@ for (level = levelList; level != NULL; level = level->next) |
37 | 37 |
return closestLevel; |
38 | 38 |
} |
39 | 39 |
|
40 |
+boolean bbiFileCheckSigs(char *fileName, bits32 sig, char *typeName) |
|
41 |
+/* check file signatures at beginning and end of file */ |
|
42 |
+{ |
|
43 |
+int fd = mustOpenFd(fileName, O_RDONLY); |
|
44 |
+bits32 magic; |
|
45 |
+boolean isSwapped = FALSE; |
|
46 |
+ |
|
47 |
+// look for signature at the beginning of the file |
|
48 |
+mustReadFd(fd, &magic, sizeof(magic)); |
|
49 |
+ |
|
50 |
+if (magic != sig) |
|
51 |
+ { |
|
52 |
+ magic = byteSwap32(magic); |
|
53 |
+ isSwapped = TRUE; |
|
54 |
+ if (magic != sig) |
|
55 |
+ return FALSE; |
|
56 |
+ } |
|
57 |
+ |
|
58 |
+// look for signature at the end of the file |
|
59 |
+mustLseek(fd, -sizeof(magic), SEEK_END); |
|
60 |
+mustReadFd(fd, &magic, sizeof(magic)); |
|
61 |
+mustCloseFd(&fd); |
|
62 |
+ |
|
63 |
+if (isSwapped) |
|
64 |
+ { |
|
65 |
+ magic = byteSwap32(magic); |
|
66 |
+ if (magic != sig) |
|
67 |
+ return FALSE; |
|
68 |
+ } |
|
69 |
+else |
|
70 |
+ { |
|
71 |
+ if (magic != sig) |
|
72 |
+ return FALSE; |
|
73 |
+ } |
|
74 |
+ |
|
75 |
+return TRUE; |
|
76 |
+} |
|
77 |
+ |
|
40 | 78 |
struct bbiFile *bbiFileOpen(char *fileName, bits32 sig, char *typeName) |
41 | 79 |
/* Open up big wig or big bed file. */ |
42 | 80 |
{ |
... | ... |
@@ -551,11 +589,11 @@ if (start >= end) |
551 | 589 |
return result; |
552 | 590 |
bzero(summary, summarySize * sizeof(summary[0])); |
553 | 591 |
|
554 |
-/* Figure out what size of data we want. We actually want to get 4 data points per summary |
|
592 |
+/* Figure out what size of data we want. We actually want to get 2 data points per summary |
|
555 | 593 |
* value if possible to minimize the effect of a data point being split between summary pixels. */ |
556 | 594 |
bits32 baseSize = end - start; |
557 | 595 |
int fullReduction = (baseSize/summarySize); |
558 |
-int zoomLevel = fullReduction/4; |
|
596 |
+int zoomLevel = fullReduction/2; |
|
559 | 597 |
if (zoomLevel < 0) |
560 | 598 |
zoomLevel = 0; |
561 | 599 |
|
... | ... |
@@ -692,3 +730,9 @@ else if (bbi->version == 1) |
692 | 730 |
return res; |
693 | 731 |
} |
694 | 732 |
|
733 |
+time_t bbiUpdateTime(struct bbiFile *bbi) |
|
734 |
+/* return bbi->udc->updateTime */ |
|
735 |
+{ |
|
736 |
+struct udcFile *udc = bbi->udc; |
|
737 |
+return udcUpdateTime(udc); |
|
738 |
+} |
... | ... |
@@ -7,6 +7,7 @@ |
7 | 7 |
#include "cirTree.h" |
8 | 8 |
#include "bPlusTree.h" |
9 | 9 |
#include "bbiFile.h" |
10 |
+#include "obscure.h" |
|
10 | 11 |
|
11 | 12 |
void bbiWriteDummyHeader(FILE *f) |
12 | 13 |
/* Write out all-zero header, just to reserve space for it. */ |
... | ... |
@@ -30,6 +31,16 @@ writeOne(f, sum->sumData); |
30 | 31 |
writeOne(f, sum->sumSquares); |
31 | 32 |
} |
32 | 33 |
|
34 |
+static int bbiChromInfoCmp(const void *va, const void *vb) |
|
35 |
+/* Sort bbiChromInfo. Unlike most of our sorts this is single rather |
|
36 |
+ * than double indirect. */ |
|
37 |
+{ |
|
38 |
+const struct bbiChromInfo *a = (const struct bbiChromInfo *)va; |
|
39 |
+const struct bbiChromInfo *b = (const struct bbiChromInfo *)vb; |
|
40 |
+return strcmp(a->name, b->name); |
|
41 |
+} |
|
42 |
+ |
|
43 |
+ |
|
33 | 44 |
void bbiWriteChromInfo(struct bbiChromUsage *usageList, int blockSize, FILE *f) |
34 | 45 |
/* Write out information on chromosomes to file. */ |
35 | 46 |
{ |
... | ... |
@@ -52,6 +63,9 @@ for (i=0, usage = usageList; i<chromCount; ++i, usage = usage->next) |
52 | 63 |
chromInfoArray[i].size = usage->size; |
53 | 64 |
} |
54 | 65 |
|
66 |
+/* Sort so the b-Tree actually works. */ |
|
67 |
+qsort(chromInfoArray, chromCount, sizeof(chromInfoArray[0]), bbiChromInfoCmp); |
|
68 |
+ |
|
55 | 69 |
/* Write chromosome bPlusTree */ |
56 | 70 |
int chromBlockSize = min(blockSize, chromCount); |
57 | 71 |
bptFileBulkIndexToOpenFile(chromInfoArray, sizeof(chromInfoArray[0]), chromCount, chromBlockSize, |
... | ... |
@@ -130,6 +144,9 @@ int lastStart = -1; |
130 | 144 |
bits32 id = 0; |
131 | 145 |
bits64 totalBases = 0, bedCount = 0; |
132 | 146 |
int minDiff = BIGNUM; |
147 |
+ |
|
148 |
+lineFileRemoveInitialCustomTrackLines(lf); |
|
149 |
+ |
|
133 | 150 |
for (;;) |
134 | 151 |
{ |
135 | 152 |
int rowSize = lineFileChopNext(lf, row, ArraySize(row)); |
... | ... |
@@ -139,6 +156,16 @@ for (;;) |
139 | 156 |
char *chrom = row[0]; |
140 | 157 |
int start = lineFileNeedNum(lf, row, 1); |
141 | 158 |
int end = lineFileNeedNum(lf, row, 2); |
159 |
+ if (start >= end) |
|
160 |
+ { |
|
161 |
+ if (start == end) |
|
162 |
+ errAbort("line %d of %s: start and end coordinates the same\n" |
|
163 |
+ "They need to be at least one apart" |
|
164 |
+ , lf->lineIx, lf->fileName); |
|
165 |
+ else |
|
166 |
+ errAbort("end (%d) before start (%d) line %d of %s", |
|
167 |
+ end, start, lf->lineIx, lf->fileName); |
|
168 |
+ } |
|
142 | 169 |
++bedCount; |
143 | 170 |
totalBases += (end - start); |
144 | 171 |
if (usage == NULL || differentString(usage->name, chrom)) |
... | ... |
@@ -149,10 +176,14 @@ for (;;) |
149 | 176 |
lf->fileName, lf->lineIx); |
150 | 177 |
} |
151 | 178 |
hashAdd(uniqHash, chrom, NULL); |
179 |
+ struct hashEl *chromHashEl = hashLookup(chromSizesHash, chrom); |
|
180 |
+ if (chromHashEl == NULL) |
|
181 |
+ errAbort("%s is not found in chromosome sizes file", chrom); |
|
182 |
+ int chromSize = ptToInt(chromHashEl->val); |
|
152 | 183 |
AllocVar(usage); |
153 | 184 |
usage->name = cloneString(chrom); |
154 | 185 |
usage->id = id++; |
155 |
- usage->size = hashIntVal(chromSizesHash, chrom); |
|
186 |
+ usage->size = chromSize; |
|
156 | 187 |
slAddHead(&usageList, usage); |
157 | 188 |
lastStart = -1; |
158 | 189 |
} |
... | ... |
@@ -222,7 +253,7 @@ while (start < end) |
222 | 253 |
newSum->minVal = minVal; |
223 | 254 |
newSum->maxVal = maxVal; |
224 | 255 |
sum = newSum; |
225 |
- slAddHead(pOutList, sum); |
|
256 |
+ slAddHead(pOutList, sum); |
|
226 | 257 |
} |
227 | 258 |
|
228 | 259 |
/* Figure out amount of overlap between current summary and item */ |
... | ... |
@@ -519,7 +550,7 @@ else |
519 | 550 |
twiceReduced->end = sum->end; |
520 | 551 |
twiceReduced->validCount += sum->validCount; |
521 | 552 |
if (sum->minVal < twiceReduced->minVal) twiceReduced->minVal = sum->minVal; |
522 |
- if (sum->maxVal < twiceReduced->maxVal) twiceReduced->maxVal = sum->maxVal; |
|
553 |
+ if (sum->maxVal > twiceReduced->maxVal) twiceReduced->maxVal = sum->maxVal; |
|
523 | 554 |
twiceReduced->sumData += sum->sumData; |
524 | 555 |
twiceReduced->sumSquares += sum->sumSquares; |
525 | 556 |
} |
... | ... |
@@ -68,35 +68,8 @@ char *bigBedAutoSqlText(struct bbiFile *bbi); |
68 | 68 |
struct asObject *bigBedAs(struct bbiFile *bbi); |
69 | 69 |
/* Get autoSql object definition if any associated with file. */ |
70 | 70 |
|
71 |
-void bigBedFileCreate( |
|
72 |
- char *inName, /* Input file in a tabular bed format <chrom><start><end> + whatever. */ |
|
73 |
- char *chromSizes, /* Two column tab-separated file: <chromosome> <size>. */ |
|
74 |
- int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ |
|
75 |
- int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ |
|
76 |
- bits16 definedFieldCount, /* Number of defined bed fields - 3-16 or so. 0 means all fields |
|
77 |
- * are the defined bed ones. */ |
|
78 |
- char *asFileName, /* If non-null points to a .as file that describes fields. */ |
|
79 |
- boolean clip, /* If set silently clip out of bound coordinates. */ |
|
80 |
- char *outName); /* BigBed output file name. */ |
|
81 |
-/* Convert tab-separated bed file to binary indexed, zoomed bigBed version. */ |
|
82 |
- |
|
83 |
-void bigBedFileCreateDetailed( |
|
84 |
- struct ppBed *pbList, /* Input bed data. Must be sorted. */ |
|
85 |
- bits64 pbCount, /* size of input pbList */ |
|
86 |
- double pbAverageSize, /* average size of elements in pbList */ |
|
87 |
- char *inName, /* Input file name (for error message reporting) */ |
|
88 |
- struct hash *chromHash, /* Hash containing sizes of all chroms. */ |
|
89 |
- int blockSize, /* Number of items to bundle in r-tree. 1024 is good. */ |
|
90 |
- int itemsPerSlot, /* Number of items in lowest level of tree. 64 is good. */ |
|
91 |
- bits16 definedFieldCount, /* Number of defined bed fields - 3-16 or so. 0 means all fields |
|
92 |
- * are the defined bed ones. */ |
|
93 |
- bits16 fieldCount, /* actual field count from input data. */ |
|
94 |
- char *asFileName, /* If non-null points to a .as file that describes fields. */ |
|
95 |
- struct asObject *as, /* If non-null contains as object that describes fields. */ |
|
96 |
- bits64 fullSize, /* full size of ppBed on disk */ |
|
97 |
- char *outName); /* BigBed output file name. */ |
|
98 |
-/* create zoomed bigBed version from ppBed list. */ |
|
99 |
- |
|
71 |
+boolean bigBedFileCheckSigs(char *fileName); |
|
72 |
+/* check file signatures at beginning and end of file */ |
|
100 | 73 |
|
101 | 74 |
#endif /* BIGBED_H */ |
102 | 75 |
|
... | ... |
@@ -23,6 +23,9 @@ |
23 | 23 |
#include "bbiFile.h" |
24 | 24 |
#endif |
25 | 25 |
|
26 |
+#ifndef BITS_H |
|
27 |
+#include "bits.h" |
|
28 |
+#endif |
|
26 | 29 |
|
27 | 30 |
void bigWigFileCreate( |
28 | 31 |
char *inName, /* Input file in ascii wiggle format. */ |
... | ... |
@@ -66,5 +69,44 @@ double bigWigSingleSummary(struct bbiFile *bwf, char *chrom, int start, int end, |
66 | 69 |
enum bbiSummaryType summaryType, double defaultVal); |
67 | 70 |
/* Return the summarized single value for a range. */ |
68 | 71 |
|
72 |
+boolean isBigWig(char *fileName); |
|
73 |
+/* Peak at a file to see if it's bigWig */ |
|
74 |
+ |
|
75 |
+boolean bigWigFileCheckSigs(char *fileName); |
|
76 |
+/* check file signatures at beginning and end of file */ |
|
77 |
+ |
|
78 |
+/* bigWigValsOnChrom - a little system for optimizing bigWig use when doing a pass over the |
|
79 |
+ * whole chromosome. How it is used typically is: |
|
80 |
+ * struct bigWigValsOnChrom *chromVals = bigWigValsOnChromNew(); |
|
81 |
+ * for (chrom = chromList; chrom != NULL; chrom = chrom->next) |
|
82 |
+ * { |
|
83 |
+ * if (bigWigValsOnChromFetchData(chromVals, chrom->name, bigWig)) |
|
84 |
+ * // do stuff using the valBuf, or covBuf fields which have |
|
85 |
+ * // the big wig data unpacked into them. Can use chromSize and chrom too |
|
86 |
+ * } |
|
87 |
+ * bigWigValsOnChromFree(&chromVals); */ |
|
88 |
+ |
|
89 |
+struct bigWigValsOnChrom |
|
90 |
+/* Object for bulk access a chromosome at a time. This is faster than |
|
91 |
+ * doing bigWigInterval queries when you have ~3000 or more queries. */ |
|
92 |
+ { |
|
93 |
+ struct bigWigValsOnChrom *next; |
|
94 |
+ char *chrom; /* Current chromosome. */ |
|
95 |
+ long chromSize; /* Size of current chromosome. */ |
|
96 |
+ long bufSize; /* Size of allocated buffer */ |
|
97 |
+ double *valBuf; /* A value for each base on chrom. Zero where no data. */ |
|
98 |
+ Bits *covBuf; /* A bit for each base with data. */ |
|
99 |
+ }; |
|
100 |
+ |
|
101 |
+struct bigWigValsOnChrom *bigWigValsOnChromNew(); |
|
102 |
+/* Allocate new empty bigWigValsOnChromStructure. */ |
|
103 |
+ |
|
104 |
+void bigWigValsOnChromFree(struct bigWigValsOnChrom **pChromVals); |
|
105 |
+/* Free up bigWigValsOnChrom */ |
|
106 |
+ |
|
107 |
+boolean bigWigValsOnChromFetchData(struct bigWigValsOnChrom *chromVals, char *chrom, |
|
108 |
+ struct bbiFile *bigWig); |
|
109 |
+/* Fetch data for chromosome from bigWig. Returns FALSE if not data on that chrom. */ |
|
110 |
+ |
|
69 | 111 |
#endif /* BIGWIG_H */ |
70 | 112 |
|
... | ... |
@@ -15,70 +15,7 @@ |
15 | 15 |
#include "bwgInternal.h" |
16 | 16 |
#include "bigWig.h" |
17 | 17 |
|
18 |
-static char const rcsid[] = "$Id: bwgCreate.c,v 1.22 2009/11/25 07:17:25 kent Exp $"; |
|
19 |
- |
|
20 |
-struct bwgBedGraphItem |
|
21 |
-/* An bedGraph-type item in a bwgSection. */ |
|
22 |
- { |
|
23 |
- struct bwgBedGraphItem *next; /* Next in list. */ |
|
24 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
25 |
- float val; /* Value. */ |
|
26 |
- }; |
|
27 |
- |
|
28 |
-struct bwgVariableStepItem |
|
29 |
-/* An variableStep type item in a bwgSection. */ |
|
30 |
- { |
|
31 |
- struct bwgVariableStepItem *next; /* Next in list. */ |
|
32 |
- bits32 start; /* Start position in chromosome. */ |
|
33 |
- float val; /* Value. */ |
|
34 |
- }; |
|
35 |
- |
|
36 |
-struct bwgVariableStepPacked |
|
37 |
-/* An variableStep type item in a bwgSection. */ |
|
38 |
- { |
|
39 |
- bits32 start; /* Start position in chromosome. */ |
|
40 |
- float val; /* Value. */ |
|
41 |
- }; |
|
42 |
- |
|
43 |
-struct bwgFixedStepItem |
|
44 |
-/* An fixedStep type item in a bwgSection. */ |
|
45 |
- { |
|
46 |
- struct bwgFixedStepItem *next; /* Next in list. */ |
|
47 |
- float val; /* Value. */ |
|
48 |
- }; |
|
49 |
- |
|
50 |
-struct bwgFixedStepPacked |
|
51 |
-/* An fixedStep type item in a bwgSection. */ |
|
52 |
- { |
|
53 |
- float val; /* Value. */ |
|
54 |
- }; |
|
55 |
- |
|
56 |
-union bwgItem |
|
57 |
-/* Union of item pointers for all possible section types. */ |
|
58 |
- { |
|
59 |
- struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
60 |
- struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
61 |
- struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
62 |
- /* No packed format for bedGraph... */ |
|
63 |
- }; |
|
64 |
- |
|
65 |
-struct bwgSection |
|
66 |
-/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
67 |
- * structure used by the bigWig creation code. See also bwgSection for the |
|
68 |
- * structure returned by the bigWig reading code. */ |
|
69 |
- { |
|
70 |
- struct bwgSection *next; /* Next in list. */ |
|
71 |
- char *chrom; /* Chromosome name. */ |
|
72 |
- bits32 start,end; /* Range of chromosome covered. */ |
|
73 |
- enum bwgSectionType type; |
|
74 |
- union bwgItem items; /* List/array of items in this section. */ |
|
75 |
- bits32 itemStep; /* Step within item if applicable. */ |
|
76 |
- bits32 itemSpan; /* Item span if applicable. */ |
|
77 |
- bits16 itemCount; /* Number of items in section. */ |
|
78 |
- bits32 chromId; /* Unique small integer value for chromosome. */ |
|
79 |
- bits64 fileOffset; /* Offset of section in file. */ |
|
80 |
- }; |
|
81 |
- |
|
18 |
+static char const rcsid[] = "$Id: bwgCreate.c,v 1.27 2010/06/10 20:13:29 braney Exp $"; |
|
82 | 19 |
|
83 | 20 |
static int bwgBedGraphItemCmp(const void *va, const void *vb) |
84 | 21 |
/* Compare to sort based on query start. */ |
... | ... |
@@ -200,7 +137,7 @@ return bufSize; |
200 | 137 |
} |
201 | 138 |
|
202 | 139 |
|
203 |
-static int bwgSectionCmp(const void *va, const void *vb) |
|
140 |
+int bwgSectionCmp(const void *va, const void *vb) |
|
204 | 141 |
/* Compare to sort based on chrom,start,end. */ |
205 | 142 |
{ |
206 | 143 |
const struct bwgSection *a = *((struct bwgSection **)va); |
... | ... |
@@ -338,7 +275,7 @@ struct lm *lmLocal = lmInit(0); |
338 | 275 |
* adding values from single column to list. */ |
339 | 276 |
char *words[2]; |
340 | 277 |
char *line; |
341 |
-struct bwgVariableStepItem *item, *itemList = NULL; |
|
278 |
+struct bwgVariableStepItem *item, *nextItem, *itemList = NULL; |
|
342 | 279 |
int originalSectionSize = 0; |
343 | 280 |
while (lineFileNextReal(lf, &line)) |
344 | 281 |
{ |
... | ... |
@@ -372,6 +309,20 @@ while (lineFileNextReal(lf, &line)) |
372 | 309 |
} |
373 | 310 |
slSort(&itemList, bwgVariableStepItemCmp); |
374 | 311 |
|
312 |
+/* Make sure no overlap between items. */ |
|
313 |
+if (itemList != NULL) |
|
314 |
+ { |
|
315 |
+ item = itemList; |
|
316 |
+ for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) |
|
317 |
+ { |
|
318 |
+ if (item->start + span > nextItem->start) |
|
319 |
+ errAbort("Overlap on %s between items starting at %d and %d.\n" |
|
320 |
+ "Please remove overlaps and try again", |
|
321 |
+ chrom, item->start, nextItem->start); |
|
322 |
+ item = nextItem; |
|
323 |
+ } |
|
324 |
+ } |
|
325 |
+ |
|
375 | 326 |
/* Break up into sections of no more than items-per-slot size. */ |
376 | 327 |
int sizeLeft = originalSectionSize; |
377 | 328 |
for (item = itemList; item != NULL; ) |
... | ... |
@@ -435,7 +386,7 @@ else |
435 | 386 |
errAbort("Unknown type %s\n", typeWord); |
436 | 387 |
|
437 | 388 |
/* Set up defaults for values we hope to parse out of rest of line. */ |
438 |
-int span = 1; |
|
389 |
+int span = 0; |
|
439 | 390 |
bits32 step = 0; |
440 | 391 |
bits32 start = 0; |
441 | 392 |
char *chrom = NULL; |
... | ... |
@@ -468,8 +419,8 @@ while ((varEqVal = nextWord(&initialLine)) != NULL) |
468 | 419 |
* rest of section. */ |
469 | 420 |
if (chrom == NULL) |
470 | 421 |
errAbort("Missing chrom= setting line %d of %s\n", lf->lineIx, lf->fileName); |
471 |
-bits32 chromSize = hashIntVal(chromSizeHash, chrom); |
|
472 |
-if (start >= chromSize) |
|
422 |
+bits32 chromSize = (chromSizeHash ? hashIntVal(chromSizeHash, chrom) : BIGNUM); |
|
423 |
+if (start > chromSize) |
|
473 | 424 |
{ |
474 | 425 |
warn("line %d of %s: chromosome %s has %u bases, but item starts at %u", |
475 | 426 |
lf->lineIx, lf->fileName, chrom, chromSize, start); |
... | ... |
@@ -482,6 +433,8 @@ if (type == bwgTypeFixedStep) |
482 | 433 |
errAbort("Missing start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
483 | 434 |
if (step == 0) |
484 | 435 |
errAbort("Missing step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
436 |
+ if (span == 0) |
|
437 |
+ span = step; |
|
485 | 438 |
parseFixedStepSection(lf, clipDontDie, lm, itemsPerSlot, |
486 | 439 |
chrom, chromSize, span, start-1, step, pSectionList); |
487 | 440 |
} |
... | ... |
@@ -491,6 +444,8 @@ else |
491 | 444 |
errAbort("Extra start= setting line %d of %s\n", lf->lineIx, lf->fileName); |
492 | 445 |
if (step != 0) |
493 | 446 |
errAbort("Extra step= setting line %d of %s\n", lf->lineIx, lf->fileName); |
447 |
+ if (span == 0) |
|
448 |
+ span = 1; |
|
494 | 449 |
parseVariableStepSection(lf, clipDontDie, lm, itemsPerSlot, |
495 | 450 |
chrom, chromSize, span, pSectionList); |
496 | 451 |
} |
... | ... |
@@ -546,7 +501,7 @@ while (lineFileNextReal(lf, &line)) |
546 | 501 |
{ |
547 | 502 |
lmAllocVar(chromHash->lm, chrom); |
548 | 503 |
hashAddSaveName(chromHash, chromName, chrom, &chrom->name); |
549 |
- chrom->size = hashIntVal(chromSizeHash, chromName); |
|
504 |
+ chrom->size = (chromSizeHash ? hashIntVal(chromSizeHash, chromName) : BIGNUM); |
|
550 | 505 |
slAddHead(&chromList, chrom); |
551 | 506 |
} |
552 | 507 |
|
... | ... |
@@ -574,10 +529,22 @@ while (lineFileNextReal(lf, &line)) |
574 | 529 |
} |
575 | 530 |
slSort(&chromList, bedGraphChromCmpName); |
576 | 531 |
|
532 |
+/* Loop through each chromosome and output the item list, broken into sections |
|
533 |
+ * for that chrom. */ |
|
577 | 534 |
for (chrom = chromList; chrom != NULL; chrom = chrom->next) |
578 | 535 |
{ |
579 | 536 |
slSort(&chrom->itemList, bwgBedGraphItemCmp); |
580 | 537 |
|
538 |
+ /* Check to make sure no overlap between items. */ |
|
539 |
+ struct bwgBedGraphItem *item = chrom->itemList, *nextItem; |
|
540 |
+ for (nextItem = item->next; nextItem != NULL; nextItem = nextItem->next) |
|
541 |
+ { |
|
542 |
+ if (item->end > nextItem->start) |
|
543 |
+ errAbort("Overlap between %s %d %d and %s %d %d.\nPlease remove overlaps and try again", |
|
544 |
+ chrom->name, item->start, item->end, chrom->name, nextItem->start, nextItem->end); |
|
545 |
+ item = nextItem; |
|
546 |
+ } |
|
547 |
+ |
|
581 | 548 |
/* Break up into sections of no more than items-per-slot size. */ |
582 | 549 |
struct bwgBedGraphItem *startItem, *endItem, *nextStartItem = chrom->itemList; |
583 | 550 |
for (startItem = chrom->itemList; startItem != NULL; startItem = nextStartItem) |
... | ... |
@@ -832,8 +799,7 @@ return outList; |
832 | 799 |
} |
833 | 800 |
|
834 | 801 |
void bwgCreate(struct bwgSection *sectionList, struct hash *chromSizeHash, |
835 |
- int blockSize, int itemsPerSlot, boolean doCompress, |
|
836 |
- char *fileName) |
|
802 |
+ int blockSize, int itemsPerSlot, boolean doCompress, char *fileName) |
|
837 | 803 |
/* Create a bigWig file out of a sorted sectionList. */ |
838 | 804 |
{ |
839 | 805 |
bits64 sectionCount = slCount(sectionList); |
... | ... |
@@ -862,12 +828,12 @@ struct bbiChromInfo *chromInfoArray; |
862 | 828 |
int chromCount, maxChromNameSize; |
863 | 829 |
bwgMakeChromInfo(sectionList, chromSizeHash, &chromCount, &chromInfoArray, &maxChromNameSize); |
864 | 830 |
|
865 |
-/* Figure out initial summary level - starting with a summary 20 times the amount |
|
831 |
+/* Figure out initial summary level - starting with a summary 10 times the amount |
|
866 | 832 |
* of the smallest item. See if summarized data is smaller than half input data, if |
867 | 833 |
* not bump up reduction by a factor of 2 until it is, or until further summarying |
868 | 834 |
* yeilds no size reduction. */ |
869 | 835 |
int minRes = bwgAverageResolution(sectionList); |
870 |
-int initialReduction = minRes*20; |
|
836 |
+int initialReduction = minRes*10; |
|
871 | 837 |
bits64 fullSize = bwgTotalSectionSize(sectionList); |
872 | 838 |
bits64 maxReducedSize = fullSize/2; |
873 | 839 |
struct bbiSummary *firstSummaryList = NULL, *summaryList = NULL; |
... | ... |
@@ -878,8 +844,7 @@ for (;;) |
878 | 844 |
bits64 summarySize = bbiTotalSummarySize(summaryList); |
879 | 845 |
if (doCompress) |
880 | 846 |
{ |
881 |
- summarySize *= 4; // Compensate for summary not compressing as well as primary data |
|
882 |
- initialReduction *= 4; |
|
847 |
+ summarySize *= 2; // Compensate for summary not compressing as well as primary data |
|
883 | 848 |
} |
884 | 849 |
if (summarySize >= maxReducedSize && summarySize != lastSummarySize) |
885 | 850 |
{ |
... | ... |
@@ -1049,18 +1014,30 @@ for (i=0; i<summaryCount; ++i) |
1049 | 1014 |
writeOne(f, reductionIndexOffsets[i]); |
1050 | 1015 |
} |
1051 | 1016 |
|
1017 |
+/* Write end signature. */ |
|
1018 |
+fseek(f, 0L, SEEK_END); |
|
1019 |
+writeOne(f, sig); |
|
1020 |
+ |
|
1052 | 1021 |
/* Clean up */ |
1053 | 1022 |
freez(&chromInfoArray); |
1054 | 1023 |
carefulClose(&f); |
1055 | 1024 |
} |
1056 | 1025 |
|
1057 |
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash, |
|
1058 |
- int maxSectionSize, struct lm *lm) |
|
1026 |
+struct bwgSection *bwgParseWig( |
|
1027 |
+ char *fileName, /* Name of ascii wig file. */ |
|
1028 |
+ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */ |
|
1029 |
+ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */ |
|
1030 |
+ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */ |
|
1031 |
+ struct lm *lm) /* Memory pool to allocate from. */ |
|
1059 | 1032 |
/* Parse out ascii wig file - allocating memory in lm. */ |
1060 | 1033 |
{ |
1061 | 1034 |
struct lineFile *lf = lineFileOpen(fileName, TRUE); |
1062 | 1035 |
char *line; |
1063 | 1036 |
struct bwgSection *sectionList = NULL; |
1037 |
+ |
|
1038 |
+/* remove initial browser and track lines */ |
|
1039 |
+lineFileRemoveInitialCustomTrackLines(lf); |
|
1040 |
+ |
|
1064 | 1041 |
while (lineFileNextReal(lf, &line)) |
1065 | 1042 |
{ |
1066 | 1043 |
verbose(2, "processing %s\n", line); |
... | ... |
@@ -1089,7 +1066,7 @@ while (lineFileNextReal(lf, &line)) |
1089 | 1066 |
} |
1090 | 1067 |
slSort(§ionList, bwgSectionCmp); |
1091 | 1068 |
|
1092 |
-/* Check for overlap. */ |
|
1069 |
+/* Check for overlap at section level. */ |
|
1093 | 1070 |
struct bwgSection *section, *nextSection; |
1094 | 1071 |
for (section = sectionList; section != NULL; section = nextSection) |
1095 | 1072 |
{ |
... | ... |
@@ -14,8 +14,93 @@ enum bwgSectionType |
14 | 14 |
bwgTypeFixedStep=3, |
15 | 15 |
}; |
16 | 16 |
|
17 |
-struct bwgSection *bwgParseWig(char *fileName, boolean clipDontDie, struct hash *chromSizeHash, |
|
18 |
- int maxSectionSize, struct lm *lm); |
|
17 |
+struct bwgBedGraphItem |
|
18 |
+/* An bedGraph-type item in a bwgSection. */ |
|
19 |
+ { |
|
20 |
+ struct bwgBedGraphItem *next; /* Next in list. */ |
|
21 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
22 |
+ float val; /* Value. */ |
|
23 |
+ }; |
|
24 |
+ |
|
25 |
+struct bwgVariableStepItem |
|
26 |
+/* An variableStep type item in a bwgSection. */ |
|
27 |
+ { |
|
28 |
+ struct bwgVariableStepItem *next; /* Next in list. */ |
|
29 |
+ bits32 start; /* Start position in chromosome. */ |
|
30 |
+ float val; /* Value. */ |
|
31 |
+ }; |
|
32 |
+ |
|
33 |
+struct bwgVariableStepPacked |
|
34 |
+/* An variableStep type item in a bwgSection. */ |
|
35 |
+ { |
|
36 |
+ bits32 start; /* Start position in chromosome. */ |
|
37 |
+ float val; /* Value. */ |
|
38 |
+ }; |
|
39 |
+ |
|
40 |
+struct bwgFixedStepItem |
|
41 |
+/* An fixedStep type item in a bwgSection. */ |
|
42 |
+ { |
|
43 |
+ struct bwgFixedStepItem *next; /* Next in list. */ |
|
44 |
+ float val; /* Value. */ |
|
45 |
+ }; |
|
46 |
+ |
|
47 |
+struct bwgFixedStepPacked |
|
48 |
+/* An fixedStep type item in a bwgSection. */ |
|
49 |
+ { |
|
50 |
+ float val; /* Value. */ |
|
51 |
+ }; |
|
52 |
+ |
|
53 |
+union bwgItem |
|
54 |
+/* Union of item pointers for all possible section types. */ |
|
55 |
+ { |
|
56 |
+ struct bwgBedGraphItem *bedGraphList; /* A linked list */ |
|
57 |
+ struct bwgFixedStepPacked *fixedStepPacked; /* An array */ |
|
58 |
+ struct bwgVariableStepPacked *variableStepPacked; /* An array */ |
|
59 |
+ /* No packed format for bedGraph... */ |
|
60 |
+ }; |
|
61 |
+ |
|
62 |
+struct bwgSection |
|
63 |
+/* A section of a bigWig file - all on same chrom. This is a somewhat fat data |
|
64 |
+ * structure used by the bigWig creation code. See also bwgSection for the |
|
65 |
+ * structure returned by the bigWig reading code. */ |
|
66 |
+ { |
|
67 |
+ struct bwgSection *next; /* Next in list. */ |
|
68 |
+ char *chrom; /* Chromosome name. */ |
|
69 |
+ bits32 start,end; /* Range of chromosome covered. */ |
|
70 |
+ enum bwgSectionType type; |
|
71 |
+ union bwgItem items; /* List/array of items in this section. */ |
|
72 |
+ bits32 itemStep; /* Step within item if applicable. */ |
|
73 |
+ bits32 itemSpan; /* Item span if applicable. */ |
|
74 |
+ bits16 itemCount; /* Number of items in section. */ |
|
75 |
+ bits32 chromId; /* Unique small integer value for chromosome. */ |
|
76 |
+ bits64 fileOffset; /* Offset of section in file. */ |
|
77 |
+ }; |
|
78 |
+ |
|
79 |
+struct bwgSectionHead |
|
80 |
+/* A header from a bigWig file section - similar to above bug what is on disk. */ |
|
81 |
+ { |
|
82 |
+ bits32 chromId; /* Chromosome short identifier. */ |
|
83 |
+ bits32 start,end; /* Range covered. */ |
|
84 |
+ bits32 itemStep; /* For some section types, the # of bases between items. */ |
|
85 |
+ bits32 itemSpan; /* For some section types, the # of bases in each item. */ |
|
86 |
+ UBYTE type; /* Type byte. */ |
|
87 |
+ UBYTE reserved; /* Always zero for now. */ |
|
88 |
+ bits16 itemCount; /* Number of items in block. */ |
|
89 |
+ }; |
|
90 |
+ |
|
91 |
+void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped); |
|
92 |
+/* Read section header. */ |
|
93 |
+ |
|
94 |
+ |
|
95 |
+int bwgSectionCmp(const void *va, const void *vb); |
|
96 |
+/* Compare to sort based on chrom,start,end. */ |
|
97 |
+ |
|
98 |
+struct bwgSection *bwgParseWig( |
|
99 |
+ char *fileName, /* Name of ascii wig file. */ |
|
100 |
+ boolean clipDontDie, /* Skip items outside chromosome rather than aborting. */ |
|
101 |
+ struct hash *chromSizeHash, /* If non-NULL items checked to be inside chromosome. */ |
|
102 |
+ int maxSectionSize, /* Biggest size of a section. 100 - 100,000 is usual range. */ |
|
103 |
+ struct lm *lm); /* Memory pool to allocate from. */ |
|
19 | 104 |
/* Parse out ascii wig file - allocating memory in lm. */ |
20 | 105 |
|
21 | 106 |
int bwgAverageResolution(struct bwgSection *sectionList); |
... | ... |
@@ -20,7 +20,7 @@ |
20 | 20 |
#include "bigWig.h" |
21 | 21 |
#include "bigBed.h" |
22 | 22 |
|
23 |
-static char const rcsid[] = "$Id: bwgQuery.c,v 1.23 2009/11/12 23:15:52 kent Exp $"; |
|
23 |
+static char const rcsid[] = "$Id: bwgQuery.c,v 1.24 2010/06/03 18:08:37 kent Exp $"; |
|
24 | 24 |
|
25 | 25 |
struct bbiFile *bigWigFileOpen(char *fileName) |
26 | 26 |
/* Open up big wig file. */ |
... | ... |
@@ -28,17 +28,11 @@ struct bbiFile *bigWigFileOpen(char *fileName) |
28 | 28 |
return bbiFileOpen(fileName, bigWigSig, "big wig"); |
29 | 29 |
} |
30 | 30 |
|
31 |
-struct bwgSectionHead |
|
32 |
-/* A header from a bigWig file section */ |
|
33 |
- { |
|
34 |
- bits32 chromId; /* Chromosome short identifier. */ |
|
35 |
- bits32 start,end; /* Range covered. */ |
|
36 |
- bits32 itemStep; /* For some section types, the # of bases between items. */ |
|
37 |
- bits32 itemSpan; /* For some section types, the # of bases in each item. */ |
|
38 |
- UBYTE type; /* Type byte. */ |
|
39 |
- UBYTE reserved; /* Always zero for now. */ |
|
40 |
- bits16 itemCount; /* Number of items in block. */ |
|
41 |
- }; |
|
31 |
+boolean bigWigFileCheckSigs(char *fileName) |
|
32 |
+/* check file signatures at beginning and end of file */ |
|
33 |
+{ |
|
34 |
+return bbiFileCheckSigs(fileName, bigWigSig, "big wig"); |
|
35 |
+} |
|
42 | 36 |
|
43 | 37 |
#ifdef OLD |
44 | 38 |
static void bwgSectionHeadRead(struct bbiFile *bwf, struct bwgSectionHead *head) |
... | ... |
@@ -57,7 +51,7 @@ head->itemCount = udcReadBits16(udc, isSwapped); |
57 | 51 |
} |
58 | 52 |
#endif /* OLD */ |
59 | 53 |
|
60 |
-static void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped) |
|
54 |
+void bwgSectionHeadFromMem(char **pPt, struct bwgSectionHead *head, boolean isSwapped) |
|
61 | 55 |
/* Read section header. */ |
62 | 56 |
{ |
63 | 57 |
char *pt = *pPt; |
... | ... |
@@ -388,3 +382,16 @@ bigWigSummaryArray(bwf, chrom, start, end, summaryType, 1, &arrayOfOne); |
388 | 382 |
return arrayOfOne; |
389 | 383 |
} |
390 | 384 |
|
385 |
+boolean isBigWig(char *fileName) |
|
386 |
+/* Peak at a file to see if it's bigWig */ |
|
387 |
+{ |
|
388 |
+FILE *f = mustOpen(fileName, "rb"); |
|
389 |
+bits32 sig; |
|
390 |
+mustReadOne(f, sig); |
|
391 |
+fclose(f); |
|
392 |
+if (sig == bigWigSig) |
|
393 |
+ return TRUE; |
|
394 |
+sig = byteSwap32(sig); |
|
395 |
+return sig == bigWigSig; |
|
396 |
+} |
|
397 |
+ |
... | ... |
@@ -17,17 +17,27 @@ |
17 | 17 |
#include "hash.h" |
18 | 18 |
#endif |
19 | 19 |
|
20 |
-#define COLOR_BG_DEFAULT "#FFFEE8" |
|
21 |
-#define COLOR_BG_ALTDEFAULT "#FFF9D2" |
|
22 |
-#define COLOR_BG_GHOST "#EEEEEE" |
|
23 |
-#define COLOR_BG_PALE "#F8F8F8" |
|
24 |
-#define COLOR_DARKGREEN "#008800" |
|
25 |
-#define COLOR_DARKBLUE "#000088" |
|
26 |
-#define COLOR_LTGREY "#CCCCCC" |
|
27 |
-#define COLOR_YELLOW "#FFFF00" |
|
28 |
-#define COLOR_LTYELLOW "#FFF380" |
|
29 |
-#define COLOR_WHITE "#FFFFFF" |
|
30 |
-#define COLOR_RED "#AA0000" |
|
20 |
+#define COLOR_BG_DEFAULT "#FFFEE8" |
|
21 |
+#define COLOR_BG_ALTDEFAULT "#FFF9D2" |
|
22 |
+#define COLOR_BG_DEFAULT_DARKER "#FCECC0" |
|
23 |
+#define COLOR_BG_DEFAULT_DARKEST "#EED5B7" |
|
24 |
+#define COLOR_BG_GHOST "#EEEEEE" |
|
25 |
+#define COLOR_BG_PALE "#F8F8F8" |
|
26 |
+#define COLOR_BG_HEADER_LTBLUE "#D9E4F8" |
|
27 |
+#define COLOR_DARKGREEN "#008800" |
|
28 |
+#define COLOR_LTGREEN "#CCFFCC" |
|
29 |
+#define COLOR_DARKBLUE "#000088" |
|
30 |
+#define COLOR_BLUE_BUTTON "#91B3E6" |
|
31 |
+#define COLOR_DARKGREY "#666666" |
|
32 |
+#define COLOR_LTGREY "#CCCCCC" |
|
33 |
+#define COLOR_YELLOW "#FFFF00" |
|
34 |
+#define COLOR_LTYELLOW "#FFF380" |
|
35 |
+#define COLOR_WHITE "#FFFFFF" |
|
36 |
+#define COLOR_RED "#AA0000" |
|
37 |
+#define COLOR_TRACKLIST_LEVEL1 COLOR_BG_DEFAULT |
|
38 |
+#define COLOR_TRACKLIST_LEVEL2 COLOR_BG_ALTDEFAULT |
|
39 |
+#define COLOR_TRACKLIST_LEVEL3 COLOR_BG_DEFAULT_DARKER |
|
40 |
+#define COLOR_TRACKLIST_LEVEL4 COLOR_BG_DEFAULT_DARKEST |
|
31 | 41 |
|
32 | 42 |
void initSigHandlers(boolean dumpStack); |
33 | 43 |
/* set handler for various terminal signals for logging purposes. |
... | ... |
@@ -67,7 +77,14 @@ char *cgiScriptName(); |
67 | 77 |
/* Return name of script so libs can do context-sensitive stuff. */ |
68 | 78 |
|
69 | 79 |
char *cgiServerName(); |
70 |
-/* Return name of server */ |
|
80 |
+/* Return name of server, better to use cgiServerNamePort() for |
|
81 |
+ actual URL construction */ |
|
82 |
+ |
|
83 |
+char *cgiServerPort(); |
|
84 |
+/* Return port number of server */ |
|
85 |
+ |
|
86 |
+char *cgiServerNamePort(); |
|
87 |
+/* Return name of server with port if different than 80 */ |
|
71 | 88 |
|
72 | 89 |
char *cgiRemoteAddr(); |
73 | 90 |
/* Return IP address of client (or "unknown"). */ |
... | ... |
@@ -75,8 +92,33 @@ char *cgiRemoteAddr(); |
75 | 92 |
char *cgiUserAgent(); |
76 | 93 |
/* Return remote user agent (HTTP_USER_AGENT) or NULL if remote user agent is not known */ |
77 | 94 |
|
95 |
+enum browserType |
|
96 |
+/* How to look at a track. */ |
|
97 |
+ { |
|
98 |
+ btUnknown=0, // Not yet known |
|
99 |
+ btOpera=1, // Opera |
|
100 |
+ btIE=2, // MS Internet Explorer |
|
101 |
+ btFF=3, // Firefox |
|
102 |
+ btChrome=4, // Google Chrome |
|
103 |
+ btSafari=5, // Safari |
|
104 |
+ btOther=6 // Anything else |
|
105 |
+ }; |
|
106 |
+ |
|
107 |
+enum osType |
|
108 |
+/* How to look at a track. */ |
|
109 |
+ { |
|
110 |
+ osUnknown=0, // Not yet known |
|
111 |
+ osWindows=1, // The evil empire |
|
112 |
+ osLinux=2, // Workhorse |
|
113 |
+ osMac=3, // ashion or Religion |
|
114 |
+ osOther=4 // Anything else |
|
115 |
+ }; |
|
116 |
+ |
|
117 |
+enum browserType cgiClientBrowser(char **browserQualifier, enum osType *clientOs, char **clientOsQualifier); |
|
78 | 118 |
/* These routines abort the html output if the input isn't |
79 | 119 |
* there or is misformatted. */ |
120 |
+#define cgiBrowser() cgiClientBrowser(NULL,NULL,NULL) |
|
121 |
+ |
|
80 | 122 |
char *cgiString(char *varName); |
81 | 123 |
int cgiInt(char *varName); |
82 | 124 |
double cgiDouble(char *varName); |
... | ... |
@@ -213,9 +255,9 @@ void cgiMakeCheckBoxJS(char *name, boolean checked, char *javascript); |
213 | 255 |
void cgiMakeCheckBoxIdAndJS(char *name, boolean checked, char *id, char *javascript); |
214 | 256 |
/* Make check box with ID and javascript. */ |
215 | 257 |
|
216 |
-void cgiMakeCheckBox2BoolWithIdAndJS(char *name, boolean checked, boolean enabled,char *id, char *javascript); |
|
217 |
-/* Make check box supporting 2 boolean state: checke/unchecked and enabled/disabled |
|
218 |
- Also support ID and javascript.*/ |
|
258 |
+void cgiMakeCheckBoxFourWay(char *name, boolean checked, boolean enabled, char *id, char *classes, char *moreHtml); |
|
259 |
+/* Make check box - with fourWay functionality (checked/unchecked by enabled/disabled |
|
260 |
+ * Also makes a shadow hidden variable that supports the 2 boolean states. */ |
|
219 | 261 |
|
220 | 262 |
void cgiMakeTextArea(char *varName, char *initialVal, int rowCount, int columnCount); |
221 | 263 |
/* Make a text area with area rowCount X columnCount and with text: intialVal. */ |
... | ... |
@@ -287,6 +329,19 @@ void cgiMakeDropListWithVals(char *name, char *menu[], char *values[], |
287 | 329 |
void cgiMakeDropListFull(char *name, char *menu[], char *values[], int menuSize, char *checked, char *extraAttribs); |
288 | 330 |
/* Make a drop-down list with names and values. */ |
289 | 331 |
|
332 |
+void cgiDropDownWithTextValsAndExtra(char *name, char *text[], char *values[], |
|
333 |
+ int count, char *selected, char *extra); |
|
334 |
+/* Make a drop-down list with both text and values. */ |
|
335 |
+ |
|
336 |
+char *cgiMakeSelectDropList(boolean multiple, char *name, struct slPair *valsAndLabels,char *selected, char *anyAll,char *extraClasses, char *extraHtml); |
|
337 |
+// Returns allocated string of HTML defining a drop-down select (if multiple, REQUIRES ui-dropdownchecklist.js) |
|
338 |
+// In valsAndLabels, val (pair->name) must be filled in but label (pair->val) may be NULL. |
|
339 |
+// selected, if not NULL is a val found in the valsAndLabels (multiple then comma delimited list). If null and anyAll not NULL, that will be selected |
|
340 |
+// anyAll, if not NULL is the string for an initial option. It can contain val and label, delimited by a comma |
|
341 |
+// extraHtml, if not NULL contains id, javascript calls and style. It does NOT contain class definitions |
|
342 |
+#define cgiMakeMultiSelectDropList(name, valsAndLabels, selected, anyAll, extraClasses, extraHtml) cgiMakeSelectDropList(TRUE, (name), (valsAndLabels), (selected), (anyAll), (extraClasses), (extraHtml)) |
|
343 |
+#define cgiMakeSingleSelectDropList(name, valsAndLabels, selected, anyAll, extraClasses, extraHtml) cgiMakeSelectDropList(FALSE,(name), (valsAndLabels), (selected), (anyAll), (extraClasses), (extraHtml)) |
|
344 |
+ |
|
290 | 345 |
void cgiMakeMultList(char *name, char *menu[], int menuSize, struct slName *checked, int length); |
291 | 346 |
/* Make a list of names which can have multiple selections. |
292 | 347 |
* Same as drop-down list except "multiple" is added to select tag */ |
... | ... |
@@ -301,7 +356,10 @@ void cgiMakeCheckboxGroupWithVals(char *name, char *menu[], char *values[], int |
301 | 356 |
/* Make a table of checkboxes that have the same variable name but different |
302 | 357 |
* values (same behavior as a multi-select input), with nice labels in menu[]. */ |
303 | 358 |
|
304 |
-void cgiMakeHiddenVar(char *varName, char *string); |
|
359 |
+void cgiMakeHiddenVarWithExtra(char *varName, char *string, char *extra); |
|
360 |
+/* Store string in hidden input for next time around. */ |
|
361 |
+ |
|
362 |
+#define cgiMakeHiddenVar(name,val) cgiMakeHiddenVarWithExtra((name),(val),NULL) |
|
305 | 363 |
/* Store string in hidden input for next time around. */ |
306 | 364 |
|
307 | 365 |
void cgiContinueHiddenVar(char *varName); |
... | ... |
@@ -414,14 +472,17 @@ void cgiResetState(); |
414 | 472 |
/* This is for reloading CGI settings multiple times in the same program |
415 | 473 |
* execution. No effect if state has not yet been initialized. */ |
416 | 474 |
|
417 |
-void commonCssStyles(); |
|
418 |
-/* Defines a few common styles to use through CSS */ |
|
475 |
+void cgiDown(float lines); |
|
476 |
+// Drop down a certain number of lines (may be fractional) |
|
477 |
+ |
|
478 |
+char *commonCssStyles(); |
|
479 |
+/* Returns a string of common CSS styles */ |
|
419 | 480 |
|
420 | 481 |
char *javaScriptLiteralEncode(char *inString); |
421 |
-/* Use backslash escaping on newline |
|
422 |
- * and quote chars, backslash and others. |
|
423 |
- * Intended that the encoded string will be |
|
424 |
- * put between quotes at a higher level and |
|
482 |
+/* Use backslash escaping on newline |
|
483 |
+ * and quote chars, backslash and others. |
|
484 |
+ * Intended that the encoded string will be |
|
485 |
+ * put between quotes at a higher level and |
|
425 | 486 |
* then interpreted by Javascript. */ |
426 | 487 |
|
427 | 488 |
#endif /* CHEAPCGI_H */ |
... | ... |
@@ -451,7 +451,7 @@ if (crt != NULL) |
451 | 451 |
} |
452 | 452 |
} |
453 | 453 |
|
454 |
-static inline int cmpTwoBits32(bits32 aHi, bits32 aLo, bits32 bHi, bits32 bLo) |
|
454 |
+inline int cmpTwoBits32(bits32 aHi, bits32 aLo, bits32 bHi, bits32 bLo) |
|
455 | 455 |
/* Return - if b is less than a , 0 if equal, else +*/ |
456 | 456 |
{ |
457 | 457 |
if (aHi < bHi) |
... | ... |
@@ -8,8 +8,9 @@ |
8 | 8 |
#include "errabort.h" |
9 | 9 |
#include "portable.h" |
10 | 10 |
#include "linefile.h" |
11 |
+#include "hash.h" |
|
11 | 12 |
|
12 |
-static char const rcsid[] = "$Id: common.c,v 1.140 2009/11/24 15:36:59 kent Exp $"; |
|
13 |
+static char const rcsid[] = "$Id: common.c,v 1.151 2010/06/02 19:06:41 tdreszer Exp $"; |
|
13 | 14 |
|
14 | 15 |
void *cloneMem(void *pt, size_t size) |
15 | 16 |
/* Allocate a new buffer of given size, and copy pt to it. */ |
... | ... |
@@ -19,7 +20,7 @@ memcpy(newPt, pt, size); |
19 | 20 |
return newPt; |
20 | 21 |
} |
21 | 22 |
|
22 |
-static char *cloneStringZExt(char *s, int size, int copySize) |
|
23 |
+static char *cloneStringZExt(const char *s, int size, int copySize) |
|
23 | 24 |
/* Make a zero terminated copy of string in memory */ |
24 | 25 |
{ |
25 | 26 |
char *d = needMem(copySize+1); |
... | ... |
@@ -29,13 +30,13 @@ d[copySize] = 0; |
29 | 30 |
return d; |
30 | 31 |
} |
31 | 32 |
|
32 |
-char *cloneStringZ(char *s, int size) |
|
33 |
+char *cloneStringZ(const char *s, int size) |
|
33 | 34 |
/* Make a zero terminated copy of string in memory */ |
34 | 35 |
{ |
35 | 36 |
return cloneStringZExt(s, strlen(s), size); |
36 | 37 |
} |
37 | 38 |
|
38 |
-char *cloneString(char *s) |
|
39 |
+char *cloneString(const char *s) |
|
39 | 40 |
/* Make copy of string in dynamic memory */ |
40 | 41 |
{ |
41 | 42 |
int size = 0; |
... | ... |
@@ -52,13 +53,16 @@ size_t size = strlen(s); |
52 | 53 |
return cloneMem(s, size+1); |
53 | 54 |
} |
54 | 55 |
|
55 |
- |
|
56 |
-/* fill a specified area of memory with zeroes */ |
|
57 |
-void zeroBytes(void *vpt, int count) |
|
56 |
+char *catTwoStrings(char *a, char *b) |
|
57 |
+/* Allocate new string that is a concatenation of two strings. */ |
|
58 | 58 |
{ |
59 |
-char *pt = (char*)vpt; |
|
60 |
-while (--count>=0) |
|
61 |
- *pt++=0; |
|
59 |
+int aLen = strlen(a), bLen = strlen(b); |
|
60 |
+int len = aLen + bLen; |
|
61 |
+char *newBuf = needLargeMem(len+1); |
|
62 |
+memcpy(newBuf, a, aLen); |
|
63 |
+memcpy(newBuf+aLen, b, bLen); |
|
64 |
+newBuf[len] = 0; |
|
65 |
+return newBuf; |
|
62 | 66 |
} |
63 | 67 |
|
64 | 68 |
/* Reverse the order of the bytes. */ |
... | ... |
@@ -149,7 +153,7 @@ for (i=0; i<length; ++i) |
149 | 153 |
/** List managing routines. */ |
150 | 154 |
|
151 | 155 |
/* Count up elements in list. */ |
152 |
-int slCount(void *list) |
|
156 |
+int slCount(const void *list) |
|
153 | 157 |
{ |
154 | 158 |
struct slList *pt = (struct slList *)list; |
155 | 159 |
int len = 0; |
... | ... |
@@ -457,6 +461,24 @@ else |
457 | 461 |
return median; |
458 | 462 |
} |
459 | 463 |
|
464 |
+void doubleBoxWhiskerCalc(int count, double *array, double *retMin, |
|
465 |
+ double *retQ1, double *retMedian, double *retQ3, double *retMax) |
|
466 |
+/* Calculate what you need to draw a box and whiskers plot from an array of doubles. */ |
|
467 |
+{ |
|
468 |
+doubleSort(count, array); |
|
469 |
+*retMin = array[0]; |
|
470 |
+*retQ1 = array[(count+2)/4]; |
|
471 |
+int halfCount = count>>1; |
|
472 |
+if ((count&1) == 1) |
|
473 |
+ *retMedian = array[halfCount]; |
|
474 |
+else |
|
475 |
+ { |
|
476 |
+ *retMedian = (array[halfCount] + array[halfCount-1]) * 0.5; |
|
477 |
+ } |
|
478 |
+*retQ3 = array[(3*count+2)/4]; |
|
479 |
+*retMax = array[count-1]; |
|
480 |
+} |
|
481 |
+ |
|
460 | 482 |
struct slDouble *slDoubleNew(double x) |
461 | 483 |
/* Return a new double. */ |
462 | 484 |
{ |
... | ... |
@@ -496,6 +518,22 @@ freeMem(array); |
496 | 518 |
return median; |
497 | 519 |
} |
498 | 520 |
|
521 |
+void slDoubleBoxWhiskerCalc(struct slDouble *list, double *retMin, |
|
522 |
+ double *retQ1, double *retMedian, double *retQ3, double *retMax) |
|
523 |
+/* Calculate what you need to draw a box and whiskers plot from a list of slDoubles. */ |
|
524 |
+{ |
|
525 |
+int i,count = slCount(list); |
|
526 |
+struct slDouble *el; |
|
527 |
+double *array; |
|
528 |
+if (count == 0) |
|
529 |
+ errAbort("Can't take do slDoubleBoxWhiskerCalc of empty list"); |
|
530 |
+AllocArray(array,count); |
|
531 |
+for (i=0, el=list; i<count; ++i, el=el->next) |
|
532 |
+ array[i] = el->val; |
|
533 |
+doubleBoxWhiskerCalc(count, array, retMin, retQ1, retMedian, retQ3, retMax); |
|
534 |
+freeMem(array); |
|
535 |
+} |
|
536 |
+ |
|
499 | 537 |
static int intCmp(const void *va, const void *vb) |
500 | 538 |
/* Compare function to sort array of ints. */ |
501 | 539 |
{ |
... | ... |
@@ -582,6 +620,17 @@ const struct slName *b = *((struct slName **)vb); |
582 | 620 |
return strcmp(a->name, b->name); |
583 | 621 |
} |
584 | 622 |
|
623 |
+int slNameCmpStringsWithEmbeddedNumbers(const void *va, const void *vb) |
|
624 |
+/* Compare strings such as gene names that may have embedded numbers, |
|
625 |
+ * so that bmp4a comes before bmp14a */ |
|
626 |
+{ |
|
627 |
+const struct slName *a = *((struct slName **)va); |
|
628 |
+const struct slName *b = *((struct slName **)vb); |
|
629 |
+return cmpStringsWithEmbeddedNumbers(a->name, b->name); |
|
630 |
+} |
|
631 |
+ |
|
632 |
+ |
|
633 |
+ |
|
585 | 634 |
void slNameSort(struct slName **pList) |
586 | 635 |
/* Sort slName list. */ |
587 | 636 |
{ |
... | ... |
@@ -702,6 +751,37 @@ slReverse(&list); |
702 | 751 |
return list; |
703 | 752 |
} |
704 | 753 |
|
754 |
+struct slName *slNameListOfUniqueWords(char *text,boolean respectQuotes) |
|
755 |
+// Return list of unique words found by parsing string delimited by whitespace. |
|
756 |
+// If respectQuotes then ["Lucy and Ricky" 'Fred and Ethyl'] will yield 2 slNames no quotes |
|
757 |
+{ |
|
758 |
+struct slName *list = NULL; |
|
759 |
+char *word = NULL; |
|
760 |
+while (text != NULL) |
|
761 |
+ { |
|
762 |
+ if (respectQuotes) |
|
763 |
+ { |
|
764 |
+ word = nextWordRespectingQuotes(&text); |
|
765 |
+ if (word != NULL) |
|
766 |
+ { |
|
767 |
+ if (word[0] == '"') |
|
768 |
+ stripChar(word, '"'); |
|
769 |
+ else if (word[0] == '\'') |
|
770 |
+ stripChar(word, '\''); |
|
771 |
+ } |
|
772 |
+ } |
|
773 |
+ else |
|
774 |
+ word = nextWord(&text); |
|
775 |
+ if (word) |
|
776 |
+ slNameStore(&list, word); |
|
777 |
+ else |
|
778 |
+ break; |
|
779 |
+ } |
|
780 |
+ |
|
781 |
+slReverse(&list); |
|
782 |
+return list; |
|
783 |
+} |
|
784 |
+ |
|
705 | 785 |
struct slName *slNameListFromStringArray(char *stringArray[], int arraySize) |
706 | 786 |
/* Return list of slNames from an array of strings of length arraySize. |
707 | 787 |
* If a string in the array is NULL, the array will be treated as |
... | ... |
@@ -765,6 +845,21 @@ slReverse(&lines); |
765 | 845 |
return lines; |
766 | 846 |
} |
767 | 847 |
|
848 |
+struct slName *slNameIntersection(struct slName *a, struct slName *b) |
|
849 |
+/* return intersection of two slName lists. */ |
|
850 |
+{ |
|
851 |
+struct hash *hashA = newHash(0); |
|
852 |
+struct slName *el, *retval = NULL; |
|
853 |
+ |
|
854 |
+for (el = a; el != NULL; el = el->next) |
|
855 |
+ hashAddInt(hashA, el->name, 1); |
|
856 |
+for (el = b; el != NULL; el = el->next) |
|
857 |
+ if(hashLookup(hashA, el->name) != NULL) |
|
858 |
+ slNameAddHead(&retval, el->name); |
|
859 |
+hashFree(&hashA); |
|
860 |
+return retval; |
|
861 |
+} |
|
862 |
+ |
|
768 | 863 |
struct slRef *refOnList(struct slRef *refList, void *val) |
769 | 864 |
/* Return ref if val is already on list, otherwise NULL. */ |
770 | 865 |
{ |
... | ... |
@@ -892,31 +987,294 @@ if (el == NULL) |
892 | 987 |
return el->val; |
893 | 988 |
} |
894 | 989 |
|
895 |
-struct slPair *slPairFromString(char *s) |
|
896 |
-/* Return slPair list parsed from list in string s |
|
897 |
- * name1=val1 name2=val2 ... |
|
898 |
- * Returns NULL if parse error */ |
|
990 |
+struct slPair *slPairListFromString(char *str,boolean respectQuotes) |
|
991 |
+// Return slPair list parsed from list in string like: [name1=val1 name2=val2 ...] |
|
992 |
+// if respectQuotes then string can have double quotes: [name1="val 1" "name 2"=val2 ...] |
|
993 |
+// resulting pair strips quotes: {name1}={val 1},{name 2}={val2} |
|
994 |
+// Returns NULL if parse error. Free this up with slPairFreeValsAndList. |
|
899 | 995 |
{ |
996 |
+char *s = skipLeadingSpaces(str); // Would like to remove this and tighten up the standard someday. |
|
997 |
+if (isEmpty(s)) |
|
998 |
+ return NULL; |
|
999 |
+ |
|
900 | 1000 |
struct slPair *list = NULL; |
901 |
-char *name; |
|
902 |
-char *ss = cloneString(s); |
|
903 |
-char *word = ss; |
|
904 |
-while((name = nextWord(&word))) |
|
905 |
- { |
|
906 |
- char *val = strchr(name,'='); |
|
907 |
- if (!val) |
|
1001 |
+char name[1024]; |
|
1002 |
+char val[1024]; |
|
1003 |
+char buf[1024]; |
|
1004 |
+bool inQuote = FALSE; |
|
1005 |
+char *b = buf; |
|
1006 |
+char sep = '='; |
|
1007 |
+char c = ' '; |
|
1008 |
+int mode = 0; |
|
1009 |
+while(1) |
|
1010 |
+ { |
|
1011 |
+ c = *s++; |
|
1012 |
+ if (mode == 0 || mode == 2) // reading name or val |
|
908 | 1013 |
{ |
909 |
- warn("missing equals sign in name=value pair: name=[%s] in string=[%s]\n", name, s); |
|
910 |
- return NULL; |
|
1014 |
+ boolean term = FALSE; |
|
1015 |
+ if (respectQuotes && b == buf && !inQuote && c == '"') |
|
1016 |
+ inQuote = TRUE; |
|
1017 |
+ else if (inQuote && c == '"') |
|
1018 |
+ term = TRUE; |
|
1019 |
+ else if ((c == sep || c == 0) && !inQuote) |
|
1020 |
+ { |
|
1021 |
+ term = TRUE; |
|
1022 |
+ --s; // rewind |
|
1023 |
+ } |
|
1024 |
+ else if (c == ' ' && !inQuote) |
|
1025 |
+ { |
|
1026 |
+ warn("slPairListFromString: Unexpected whitespace in %s", str); |
|
1027 |
+ return NULL; |
|
1028 |
+ } |
|
1029 |
+ else if (c == 0 && inQuote) |
|
1030 |
+ { |
|
1031 |
+ warn("slPairListFromString: Unterminated quote in %s", str); |
|
1032 |
+ return NULL; |
|
1033 |
+ } |
|
1034 |
+ else |
|
1035 |
+ { |
|
1036 |
+ *b++ = c; |
|
1037 |
+ if ((b - buf) > sizeof buf) |
|
1038 |
+ { |
|
1039 |
+ warn("slPairListFromString: pair name or value too long in %s", str); |
|
1040 |
+ return NULL; |
|
1041 |
+ } |
|
1042 |
+ } |
|
1043 |
+ if (term) |
|
1044 |
+ { |
|
1045 |
+ inQuote = FALSE; |
|
1046 |
+ *b = 0; |
|
1047 |
+ if (mode == 0) |
|
1048 |
+ { |
|
1049 |
+ safecpy(name, sizeof name, buf); |
|
1050 |
+ if (strlen(name)<1) |
|
1051 |
+ { |
|
1052 |
+ warn("slPairListFromString: Pair name cannot be empty in %s", str); |
|
1053 |
+ return NULL; |
|
1054 |
+ } |
|
1055 |
+ // Shall we check for name being alphanumeric, at least for the respectQuotes=FALSE case? |
|
1056 |
+ } |
|
1057 |
+ else // mode == 2 |
|
1058 |
+ { |
|
1059 |
+ safecpy(val, sizeof val, buf); |
|
1060 |
+ if (!respectQuotes && (hasWhiteSpace(name) || hasWhiteSpace(val))) // should never happen |
|
1061 |
+ { |
|
1062 |
+ warn("slPairListFromString() Unexpected white space in name=value pair: [%s]=[%s] in string=[%s]\n", name, val, str); |
|
1063 |
+ break; |
|
1064 |
+ } |
|
1065 |
+ slPairAdd(&list, name, cloneString(val)); |
|
1066 |
+ } |
|
1067 |
+ ++mode; |
|
1068 |
+ } |
|
1069 |
+ } |
|
1070 |
+ else if (mode == 1) // read required "=" sign |
|
1071 |
+ { |
|
1072 |
+ if (c != '=') |
|
1073 |
+ { |
|
1074 |
+ warn("slPairListFromString: Expected character = after name in %s", str); |
|
1075 |
+ return NULL; |
|
1076 |
+ } |
|
1077 |
+ ++mode; |
|
1078 |
+ sep = ' '; |
|
1079 |
+ b = buf; |
|
1080 |
+ } |
|
1081 |
+ else // (mode == 3) reading optional separating space |
|
1082 |
+ { |
|
1083 |
+ if (c == 0) |
|
1084 |
+ break; |
|
1085 |
+ if (c != ' ') |
|
1086 |
+ { |
|
1087 |
+ mode = 0; |
|
1088 |
+ --s; |
|
1089 |
+ b = buf; |
|
1090 |
+ sep = '='; |
|
1091 |
+ } |
|
911 | 1092 |
} |
912 |
- *val++ = 0; |
|
913 |
- slPairAdd(&list, name, cloneString(val)); |
|
914 | 1093 |
} |
915 |
-freez(&ss); |
|
916 | 1094 |
slReverse(&list); |
917 | 1095 |
return list; |
918 | 1096 |
} |
919 | 1097 |
|
1098 |
+char *slPairListToString(struct slPair *list,boolean quoteIfSpaces) |
|
1099 |
+// Returns an allocated string of pairs in form of [name1=val1 name2=val2 ...] |
|
1100 |
+// If requested, will wrap |