Browse code

added some data; fixed some proposal queue bugs

Tom Sherman authored on 29/05/2018 20:47:51
Showing15 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 3.1.1
2
+Version: 3.1.2
3 3
 Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
... ...
@@ -11,39 +11,8 @@
11 11
 #' data(SimpSim)
12 12
 #' plotGAPS(SimpSim.result$Amean, SimpSim.result$Pmean)
13 13
 #' @export
14
-plotGAPS <- function(A, P, outputPDF="")
14
+plotGAPS <- function(A, P)
15 15
 {
16
-    if (outputPDF != "")
17
-    {
18
-        pdf(file=paste(outputPDF, "-Patterns", ".pdf", sep=""))
19
-    }
20
-    else
21
-    {
22
-        dev.new()
23
-    }
24
-
25
-    arrayIdx <- 1:ncol(P)
26
-    maxP <- max(P)
27
-    nPatt <- dim(P)[1]
28
-    matplot(arrayIdx, t(P), type='l', lwd=3, xlim=c(1,ncol(P)), ylim=c(0,maxP),
29
-        xlab='Samples',ylab='Relative Strength',col=rainbow(nPatt))
30
-    title(main='Inferred Patterns')
31
-    legend("topright", paste("Pattern", 1:nPatt, sep = ""), pch = 1:nPatt,
32
-        lty=1,cex=0.8,col=rainbow(nPatt),bty="y",ncol=5)
33
-
34
-    if (outputPDF == "")
35
-    {
36
-        dev.new()
37
-    }
38
-    else
39
-    {
40
-        dev.off()
41
-        pdf(file=paste(outputPDF, "-Amplitude", ".pdf", sep=""))
42
-    }
43
-
16
+    plotP(P)
44 17
     heatmap(A, Rowv=NA, Colv=NA)
45
-    if (outputPDF != "")
46
-    {
47
-        dev.off()
48
-    }
49 18
 }
... ...
@@ -7,25 +7,41 @@
7 7
 #' @examples
8 8
 #' data(SimpSim)
9 9
 #' plotP(SimpSim.result$Pmean, SimpSim.result$Psd)
10
+#' @importFrom gplots plotCI
10 11
 #' @export
11
-plotP <- function(Pmean, Psd)
12
+plotP <- function(Pmean, Psd=NULL)
12 13
 {
13
-    Nfactor <- nrow(Pmean)
14
-    Nobs <- ncol(Pmean)
15
-    RowP <- 1:Nobs
16
-    colors <- rainbow(Nfactor)
17
-    ylimits <- c(0,(max(Pmean + Psd)*1.05))
14
+    colors <- rainbow(nrow(Pmean))
15
+    xlimits <- c(0, ncol(Pmean) + 1)
16
+    ylimits <- c(0, (max(Pmean) * 1.05))
18 17
 
19
-    plotCI(x=RowP, y=Pmean[1,], col=colors[1],uiw=Psd[1,],
20
-        ylim=ylimits,type='l',ylab="Relative Amplitude")
18
+    plot(NULL, xlim=xlimits, ylim=ylimits, ylab="Relative Amplitude")
21 19
 
22
-    for (i in 2:Nfactor)
20
+    for (i in 1:nrow(Pmean))
23 21
     {
24
-        points(RowP, Pmean[i,], col=colors[i], pch=i)
25
-        plotCI(RowP, y=Pmean[i,], col=colors[i], uiw=Psd[i,],
26
-            add=TRUE)
22
+        lines(x=1:ncol(Pmean), y=Pmean[i,], col=colors[i])
23
+        points(x=1:ncol(Pmean), y=Pmean[i,], col=colors[i], pch=i)
27 24
     }
25
+    
26
+    legend("bottom", paste("Pattern", 1:nrow(Pmean), sep = ""),
27
+        pch = 1:nrow(Pmean), lty=1, cex=0.8, col=colors, bty="y", ncol=5)
28 28
 
29
-    legend("bottom", paste("Pattern", 1:Nfactor, sep = ""),
30
-        pch = 1:Nfactor, lty=1,cex=0.8, col=colors,bty="y",ncol=5)
29
+    #Nfactor <- nrow(Pmean)
30
+    #Nobs <- ncol(Pmean)
31
+    #RowP <- 1:Nobs
32
+    #colors <- rainbow(Nfactor)
33
+    #ylimits <- c(0,(max(Pmean + Psd)*1.05))
34
+    #
35
+    #plotCI(x=RowP, y=Pmean[1,], col=colors[1], uiw=Psd[1,],
36
+    #    ylim=ylimits, type='l', ylab="Relative Amplitude")
37
+    #
38
+    #for (i in 2:Nfactor)
39
+    #{
40
+    #    #points(RowP, Pmean[i,], col=colors[i], pch=i)
41
+    #    plotCI(RowP, y=Pmean[i,], col=colors[i], uiw=Psd[i,],
42
+    #        add=TRUE)
43
+    #}
44
+    #
45
+    #legend("bottom", paste("Pattern", 1:Nfactor, sep = ""),
46
+    #    pch = 1:Nfactor, lty=1,cex=0.8, col=colors,bty="y",ncol=5)
31 47
 }
32 48
Binary files a/data/GIST_TS_20084.RData and b/data/GIST_TS_20084.RData differ
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{plotGAPS}
5 5
 \title{Plot Decomposed A and P Matrices}
6 6
 \usage{
7
-plotGAPS(A, P, outputPDF = "")
7
+plotGAPS(A, P)
8 8
 }
9 9
 \arguments{
10 10
 \item{A}{the mean A matrix}
... ...
@@ -4,7 +4,7 @@
4 4
 \alias{plotP}
5 5
 \title{Plot the P Matrix}
6 6
 \usage{
7
-plotP(Pmean, Psd)
7
+plotP(Pmean, Psd = NULL)
8 8
 }
9 9
 \arguments{
10 10
 \item{Pmean}{matrix of mean values of P}
... ...
@@ -3,7 +3,6 @@
3 3
 #include "file_parser/CsvParser.h"
4 4
 
5 5
 #include <Rcpp.h>
6
-#include <omp.h>
7 6
 
8 7
 // [[Rcpp::export]]
9 8
 Rcpp::List cogapsFromFile_cpp(const std::string D)
... ...
@@ -12,9 +11,12 @@ Rcpp::List cogapsFromFile_cpp(const std::string D)
12 11
     //Rcpp::Rcout << csv.nRow() << "," << csv.nCol() << '\n';
13 12
 
14 13
     //RowMatrix mat(D);
15
-    RowMatrix mat(D);
16
-    Rcpp::Rcout << mat.nRow() << "," << mat.nCol() << '\n';
17
-    //Rcpp::Rcout << gaps::algo::sum(mat) << '\n';
14
+    RowMatrix rmat(D);
15
+    ColMatrix cmat(D);
16
+    Rcpp::Rcout << rmat.nRow() << "," << rmat.nCol() << '\n';
17
+    Rcpp::Rcout << cmat.nRow() << "," << cmat.nCol() << '\n';
18
+    Rcpp::Rcout << gaps::algo::sum(rmat) << '\n';
19
+    Rcpp::Rcout << gaps::algo::sum(cmat) << '\n';
18 20
 }
19 21
 
20 22
 // [[Rcpp::export]]
... ...
@@ -27,11 +29,6 @@ const Rcpp::NumericMatrix &FP, unsigned checkpointInterval,
27 29
 const std::string &cptFile, unsigned pumpThreshold, unsigned nPumpSamples,
28 30
 unsigned nCores)
29 31
 {
30
-    #ifdef __GAPS_OPENMP__
31
-        unsigned availableCores = omp_get_max_threads();
32
-        Rprintf("Running on %d out of %d available cores\n", nCores, availableCores);
33
-    #endif
34
-
35 32
     // create internal state from parameters and run from there
36 33
     GapsRunner runner(D, S, nFactor, nEquil, nEquilCool, nSample,
37 34
         nOutputs, nSnapshots, alphaA, alphaP, maxGibbmassA, maxGibbmassP, seed,
... ...
@@ -49,6 +49,14 @@ mStatistics(D.nrow(), D.ncol(), nFactor)
49 49
 // execute the steps of the algorithm, return list to R
50 50
 Rcpp::List GapsRunner::run()
51 51
 {
52
+    #ifdef __GAPS_OPENMP__
53
+        if (mPrintMessages)
54
+        {
55
+            unsigned availableCores = omp_get_max_threads();
56
+            Rprintf("Running on %d out of %d available cores\n", mNumCores, availableCores);
57
+        }
58
+    #endif
59
+
52 60
     // reset the checkpoint timer
53 61
     mStartTime = bpt_now();
54 62
     mLastCheckpoint = mStartTime;
... ...
@@ -251,6 +251,7 @@ unsigned col)
251 251
         mDomain.updateMass(pos, mass);
252 252
         mMatrix(row, col) += mass;
253 253
         impl()->updateAPMatrix(row, col, mass);
254
+        mQueue.acceptBirth();
254 255
     }
255 256
     else
256 257
     {
... ...
@@ -1,4 +1,4 @@
1
-PKG_CPPFLAGS =  -DSIMD -DGAPS_NDEBUG -msse4.2 -DBOOST_MATH_PROMOTE_DOUBLE_POLICY=0
1
+PKG_CPPFLAGS =  -DSIMD -DGAPS_NDEBUG -msse4.2 -DBOOST_MATH_PROMOTE_DOUBLE_POLICY=0 -fopenmp
2 2
 OBJECTS =   AtomicDomain.o \
3 3
             Cogaps.o \
4 4
             GapsRunner.o \
... ...
@@ -26,7 +26,15 @@ void ProposalQueue::setAlpha(float alpha)
26 26
 void ProposalQueue::populate(AtomicDomain &domain, unsigned limit)
27 27
 {
28 28
     unsigned nIter = 0;
29
-    while (nIter++ < limit && makeProposal(domain));
29
+    bool success = true;
30
+    while (nIter++ < limit && success)
31
+    {
32
+        success = makeProposal(domain);
33
+        if (!success)
34
+        {
35
+            mUseCachedRng = true;
36
+        }
37
+    }
30 38
 }
31 39
 
32 40
 // TODO efficiently allow clearing a range of proposals
... ...
@@ -48,8 +56,7 @@ unsigned ProposalQueue::size() const
48 56
 
49 57
 const AtomicProposal& ProposalQueue::operator[](int n) const
50 58
 {
51
-    return mQueue[mQueue.size() - 1 - n];
52
-    //return mQueue[n];
59
+    return mQueue[n];
53 60
 }
54 61
 
55 62
 void ProposalQueue::acceptDeath()
... ...
@@ -64,11 +71,14 @@ void ProposalQueue::rejectDeath()
64 71
     mMinAtoms++;
65 72
 }
66 73
 
67
-void ProposalQueue::rejectBirth()
74
+void ProposalQueue::acceptBirth()
68 75
 {
69 76
     #pragma omp atomic
70
-    mMinAtoms--;
77
+    mMinAtoms++;
78
+}
71 79
 
80
+void ProposalQueue::rejectBirth()
81
+{
72 82
     #pragma omp atomic
73 83
     mMaxAtoms--;
74 84
 }
... ...
@@ -96,26 +106,29 @@ bool ProposalQueue::makeProposal(AtomicDomain &domain)
96 106
     }
97 107
 
98 108
     float bdProb = mMaxAtoms < 2 ? 0.6667f : 0.5f;
99
-    float u1 = gaps::random::uniform(); // cache these values if a failure
109
+
110
+    mU1 = mUseCachedRng ? mU1 : gaps::random::uniform();
111
+    mU2 = mUseCachedRng ? mU2: gaps::random::uniform();
112
+    mUseCachedRng = false;
113
+
100 114
     float lowerBound = deathProb(mMinAtoms);
101 115
     float upperBound = deathProb(mMaxAtoms);
102 116
 
103
-    if (u1 <= bdProb)
117
+    if (mU1 <= bdProb)
104 118
     {
105
-        float u2 = gaps::random::uniform(); // happens, needed to prevent bias
106
-        if (u2 >= upperBound)
119
+        if (mU2 >= upperBound)
107 120
         {
108 121
             return birth(domain);
109 122
         }
110
-        if (u2 < lowerBound)
123
+        if (mU2 < lowerBound)
111 124
         {
112 125
             return death(domain);
113 126
         }
114 127
         return false;
115 128
     }
116
-    else if (u1 >= bdProb)
129
+    else if (mU1 >= bdProb)
117 130
     {
118
-        return (u1 < 0.75f || mMaxAtoms < 2) ? move(domain) : exchange(domain);
131
+        return (mU1 < 0.75f || mMaxAtoms < 2) ? move(domain) : exchange(domain);
119 132
     }
120 133
     return false;
121 134
 }
... ...
@@ -132,7 +145,7 @@ bool ProposalQueue::birth(AtomicDomain &domain)
132 145
     mUsedIndices.insert(pos / mDimensionSize);
133 146
     mUsedPositions.insert(pos);
134 147
     domain.insert(pos, 0.f);
135
-    ++mMinAtoms;
148
+    ++mMinAtoms; // TODO could be rejected
136 149
     ++mMaxAtoms;
137 150
     return true;
138 151
 }
... ...
@@ -50,6 +50,10 @@ private:
50 50
 
51 51
     float mAlpha;
52 52
 
53
+    bool mUseCachedRng;
54
+    float mU1;
55
+    float mU2;
56
+
53 57
     float deathProb(uint64_t nAtoms) const;
54 58
     bool birth(AtomicDomain &domain);
55 59
     bool death(AtomicDomain &domain);
... ...
@@ -61,7 +65,8 @@ private:
61 65
 public:
62 66
 
63 67
     ProposalQueue(unsigned nBins, float alpha)
64
-        : mMinAtoms(0), mMaxAtoms(0), mNumBins(nBins), mAlpha(alpha)
68
+        : mMinAtoms(0), mMaxAtoms(0), mNumBins(nBins), mAlpha(alpha),
69
+        mUseCachedRng(false), mU1(0.f), mU2(0.f)
65 70
     {}
66 71
 
67 72
     // set parameters
... ...
@@ -79,6 +84,7 @@ public:
79 84
     // update min/max atoms
80 85
     void acceptDeath();
81 86
     void rejectDeath();
87
+    void acceptBirth();
82 88
     void rejectBirth();
83 89
 
84 90
     // serialization
... ...
@@ -141,11 +141,26 @@ ColMatrix::ColMatrix(const Rcpp::NumericMatrix &rmat)
141 141
     }
142 142
 }
143 143
 
144
-// skip over bytes per row to efficiently get to columns
145
-// need to be able to read columns in parallel
146 144
 ColMatrix::ColMatrix(const std::string &path)
147 145
 {
146
+    // get matrix dimensions
147
+    MatrixDimensions dim(CsvParser::getDimensions(path));
148
+    mNumRows = dim.nRow;
149
+    mNumCols = dim.nCol;
150
+        
151
+    // allocate matrix
152
+    for (unsigned i = 0; i < mNumCols; ++i)
153
+    {
154
+        mCols.push_back(Vector(mNumRows));
155
+    }
148 156
 
157
+    // populate matrix
158
+    CsvParser csv(path);
159
+    while (csv.hasNext())
160
+    {
161
+        MatrixElement e(csv.getNext());
162
+        this->operator()(e.row, e.col) = e.value;
163
+    }
149 164
 }
150 165
 
151 166
 ColMatrix ColMatrix::operator/(float val) const
... ...
@@ -51,6 +51,8 @@ MatrixDimensions CsvParser::getDimensions(const std::string &path)
51 51
 CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
52 52
 {
53 53
     mFile.open(path.c_str());
54
+    std::string line;
55
+    std::getline(mFile, line); // get rid of first line (column names)
54 56
 }
55 57
 
56 58
 bool CsvParser::hasNext()
... ...
@@ -65,11 +67,13 @@ MatrixElement CsvParser::getNext()
65 67
     std::getline(mFile, line, ',');
66 68
     if ((pos = line.find('\n')) != std::string::npos) // end of line
67 69
     {
68
-        return MatrixElement(mCurrentRow, mCurrentCol, line.substr(0, pos));
70
+        unsigned col = mCurrentCol;
71
+        mCurrentCol = 0;
72
+        return MatrixElement(mCurrentRow++, col, line.substr(0, pos));
69 73
     }
70 74
     else if (std::isdigit(line[0])) // data
71 75
     {
72
-        return MatrixElement(mCurrentRow, mCurrentCol, line);
76
+        return MatrixElement(mCurrentRow, mCurrentCol++, line);
73 77
     }
74 78
     else // row/col name
75 79
     {
... ...
@@ -1,5 +1,29 @@
1 1
 #ifndef __COGAPS_TSV_PARSER_H__
2 2
 #define __COGAPS_TSV_PARSER_H__
3 3
 
4
+#include "MatrixElement.h"
5
+
6
+#include <fstream>
7
+#include <vector>
8
+#include <string>
9
+
10
+class TsvParser
11
+{
12
+private:
13
+
14
+    std::ifstream mFile;
15
+
16
+    unsigned mCurrentRow;
17
+    unsigned mCurrentCol;
18
+
19
+public:
20
+
21
+    TsvParser(const std::string &path);
22
+
23
+    bool hasNext();
24
+    MatrixElement getNext();
25
+
26
+    static MatrixDimensions getDimensions(const std::string &path);
27
+};
4 28
 
5 29
 #endif
6 30
\ No newline at end of file