Browse code

fixed NA's in std dev matrix

Tom Sherman authored on 06/08/2018 17:57:31
Showing 7 changed files

1 1
Binary files a/data/GIST.RData and b/data/GIST.RData differ
2 2
Binary files a/data/SimpSim.RData and b/data/SimpSim.RData differ
... ...
@@ -27,7 +27,7 @@ const PatternGibbsSampler &PSampler)
27 27
 
28 28
         Vector prod(ASampler.mMatrix.getCol(j) * norm);
29 29
         mAMeanMatrix.getCol(j) += prod;
30
-        mAStdMatrix.getCol(j) += gaps::algo::elementSq(prod); 
30
+        mAStdMatrix.getCol(j) += gaps::algo::elementSq(prod); // precision loss? use double?
31 31
     }
32 32
 }
33 33
 
... ...
@@ -525,20 +525,20 @@ float m2, unsigned r1, unsigned c1, unsigned r2, unsigned c2)
525 525
         float pUpper = gaps::random::p_gamma(m1 + m2, 2.f, 1.f / mLambda);
526 526
         float newMass = gaps::random::inverseGammaSample(0.f, pUpper, 2.f, 1.f / mLambda);
527 527
 
528
-        // swapping only effects alpha parameters - only effects gibbs
529
-        // flips the sign of alpha parameters (only su)
530
-        // flips sign in gibbs mass
531
-        // can we swap after gibbsMass calculation?
532
-        if ((m1 > m2 && m1 > newMass) || (m2 > m1 && m2 < newMass))
533
-        {
534
-            std::swap(r1, r2);
535
-            std::swap(c1, c2);
536
-            std::swap(p1, p2);
537
-            std::swap(m1, m2);
538
-        }
539
-
540 528
         if (impl()->canUseGibbs(r1, c1, r2, c2))
541 529
         {
530
+            // swapping only effects alpha parameters - only effects gibbs
531
+            // flips the sign of alpha parameters (only su)
532
+            // flips sign in gibbs mass
533
+            // can we swap after gibbsMass calculation?
534
+            if ((m1 > m2 && m1 > newMass) || (m2 > m1 && m2 < newMass))
535
+            {
536
+                std::swap(r1, r2);
537
+                std::swap(c1, c2);
538
+                std::swap(p1, p2);
539
+                std::swap(m1, m2);
540
+            }
541
+
542 542
             AlphaParameters alpha = impl()->alphaParameters(r1, c1, r2, c2);
543 543
             std::pair<float, bool> gMass = gibbsMass(alpha, m1, m2);
544 544
             if (gMass.second)
... ...
@@ -549,9 +549,9 @@ float m2, unsigned r1, unsigned c1, unsigned r2, unsigned c2)
549 549
             }
550 550
         }
551 551
 
552
-        // use metropolis hastings otherwise
553 552
         float delta = m1 > m2 ? newMass - m1 : m2 - newMass; // change larger mass
554
-        float pOldMass = m1 + delta > m2 - delta ? m1 : m2;
553
+        float pOldMass = 2 * newMass > m1 + m2 ? gaps::max(m1, m2) : gaps::min(m1, m2);
554
+    
555 555
         float pNew = gaps::random::d_gamma(newMass, 2.f, 1.f / mLambda);
556 556
         float pOld = gaps::random::d_gamma(pOldMass, 2.f, 1.f / mLambda);
557 557
 
... ...
@@ -120,14 +120,15 @@ template<class GenericMatrix>
120 120
 GenericMatrix gaps::algo::computeStdDev(const GenericMatrix &stdMat,
121 121
 const GenericMatrix &meanMat, unsigned nUpdates)
122 122
 {
123
+    GAPS_ASSERT(nUpdates > 1);
123 124
     GenericMatrix retMat(stdMat.nRow(), stdMat.nCol());
124
-    float meanTerm = 0.f;
125 125
     for (unsigned r = 0; r < retMat.nRow(); ++r)
126 126
     {
127 127
         for (unsigned c = 0; c < retMat.nCol(); ++c)
128 128
         {
129
-            meanTerm = meanMat(r,c) * meanMat(r,c) / static_cast<float>(nUpdates);
130
-            retMat(r,c) = std::sqrt((stdMat(r,c) - meanTerm) / (static_cast<float>(nUpdates) - 1.f));
129
+            float meanTerm = meanMat(r,c) * meanMat(r,c) / static_cast<float>(nUpdates);
130
+            float numer = gaps::max(0.f, stdMat(r,c) - meanTerm);
131
+            retMat(r,c) = std::sqrt(numer / (static_cast<float>(nUpdates) - 1.f));
131 132
         }
132 133
     }
133 134
     return retMat;
... ...
@@ -105,8 +105,6 @@ public:
105 105
     void load(const float *ptr) { mData = LOAD_PACKED(ptr); }
106 106
     void store(float *ptr) { STORE_PACKED(ptr, mData); }
107 107
 
108
-// RTTI may be expensive
109
-
110 108
 #if defined( __GAPS_AVX__ )
111 109
     float scalar()
112 110
     {
... ...
@@ -1,5 +1,13 @@
1 1
 context("CoGAPS")
2 2
 
3
+no_na_in_result <- function(gapsResult)
4
+{
5
+    sum(is.na(gapsResult@featureLoadings)) +
6
+        sum(is.na(gapsResult@featureStdDev)) +
7
+        sum(is.na(gapsResult@sampleFactors)) +
8
+        sum(is.na(gapsResult@sampleStdDev)) == 0
9
+}
10
+
3 11
 test_that("Valid Top-Level CoGAPS Calls",
4 12
 {
5 13
     data(GIST)
... ...
@@ -19,6 +27,7 @@ test_that("Valid Top-Level CoGAPS Calls",
19 27
     res[[3]] <- CoGAPS(gistCsvPath, nIterations=100, outputFrequency=50, seed=1)
20 28
     res[[4]] <- CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50, seed=1)
21 29
     res[[5]] <- CoGAPS(gistMtxPath, nIterations=100, outputFrequency=50, seed=1)
30
+    expect_true(all(sapply(res, no_na_in_result)))
22 31
     
23 32
     expect_equal(nrow(res[[1]]@featureLoadings), 1363)
24 33
     expect_equal(ncol(res[[1]]@featureLoadings), 7)
... ...
@@ -41,6 +50,7 @@ test_that("Valid Top-Level CoGAPS Calls",
41 50
         outputFrequency=50, seed=1)
42 51
     res[[5]] <- CoGAPS(gistMtxPath, transposeData=TRUE, nIterations=100,
43 52
         outputFrequency=50, seed=1)
53
+    expect_true(all(sapply(res, no_na_in_result)))
44 54
     
45 55
     expect_equal(nrow(res[[1]]@featureLoadings), 9)
46 56
     expect_equal(ncol(res[[1]]@featureLoadings), 7)
... ...
@@ -52,30 +62,42 @@ test_that("Valid Top-Level CoGAPS Calls",
52 62
         res[[i]]@sampleFactors == res[[i+1]]@sampleFactors)))
53 63
 
54 64
     # passing uncertainty
55
-    expect_error(CoGAPS(testDataFrame, uncertainty=as.matrix(GIST.uncertainty),
65
+    expect_error(res <- CoGAPS(testDataFrame, uncertainty=as.matrix(GIST.uncertainty),
56 66
         nIterations=100, outputFrequency=50, seed=1), NA)    
67
+    expect_true(no_na_in_result(res))
57 68
 
58 69
     # multiple threads
59
-    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
60
-        seed=1, nThreads=2), NA)
61
-    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
62
-        seed=1, nThreads=6), NA)
63
-    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
64
-        seed=1, nThreads=12), NA)
70
+    expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
71
+        outputFrequency=50, seed=1, nThreads=2), NA)
72
+    expect_true(no_na_in_result(res))
73
+
74
+    expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
75
+        outputFrequency=50, seed=1, nThreads=6), NA)
76
+    expect_true(no_na_in_result(res))
77
+
78
+    expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
79
+        outputFrequency=50, seed=1, nThreads=12), NA)
80
+    expect_true(no_na_in_result(res))
65 81
 
66 82
     # genome-wide CoGAPS 
67
-    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
68
-        seed=1, distributed="genome-wide"), NA)
69
-    expect_error(CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50,
70
-        seed=1, distributed="genome-wide"), NA)
83
+    expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
84
+        outputFrequency=50, seed=1, distributed="genome-wide"), NA)
85
+    expect_true(no_na_in_result(res))
86
+
87
+    expect_error(res <- CoGAPS(gistTsvPath, nIterations=100,
88
+        outputFrequency=50, seed=1, distributed="genome-wide"), NA)
89
+    expect_true(no_na_in_result(res))
71 90
 
72 91
     # single-cell CoGAPS
73
-    expect_error(CoGAPS(testDataFrame, nIterations=100, outputFrequency=50,
74
-        seed=1, distributed="single-cell", singleCell=TRUE,
92
+    expect_error(res <- CoGAPS(testDataFrame, nIterations=100,
93
+        outputFrequency=50, seed=1, distributed="single-cell", singleCell=TRUE,
75 94
         transposeData=TRUE), NA)
76
-    expect_error(CoGAPS(gistTsvPath, nIterations=100, outputFrequency=50,
77
-        seed=1, distributed="single-cell", singleCell=TRUE,
95
+    expect_true(no_na_in_result(res))
96
+
97
+    expect_error(res <- CoGAPS(gistTsvPath, nIterations=100,
98
+        outputFrequency=50, seed=1, distributed="single-cell", singleCell=TRUE,
78 99
         transposeData=TRUE), NA)
100
+    expect_true(no_na_in_result(res))
79 101
 })
80 102
 
81 103
 #test_that("Invalid Top-Level CoGAPS Calls",