Browse code

csvparser working - need to adapt it to be used for matrix construction

Tom Sherman authored on 24/05/2018 22:34:36
Showing10 changed files

... ...
@@ -1,6 +1,7 @@
1 1
 # Generated by roxygen2: do not edit by hand
2 2
 
3 3
 export(CoGAPS)
4
+export(CoGAPSFromFile)
4 5
 export(GWCoGAPS)
5 6
 export(GWCoGapsFromCheckpoint)
6 7
 export(binaryA)
... ...
@@ -128,6 +128,13 @@ CoGapsFromCheckpoint <- function(D, S, path, checkpointFile=NA)
128 128
     cogapsFromCheckpoint_cpp(D, S, path, checkpointFile)
129 129
 }
130 130
 
131
+#' CoGAPS with file input for matrix
132
+#' @export
133
+CoGAPSFromFile <- function(D)
134
+{
135
+    cogapsFromFile_cpp(D)
136
+}
137
+
131 138
 #' Display Information About Package Compilation
132 139
 #'
133 140
 #' @details displays information about how the package was compiled, i.e. which
... ...
@@ -1,6 +1,10 @@
1 1
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 2
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 3
 
4
+cogapsFromFile_cpp <- function(D) {
5
+    .Call('_CoGAPS_cogapsFromFile_cpp', PACKAGE = 'CoGAPS', D)
6
+}
7
+
4 8
 cogaps_cpp <- function(D, S, nFactor, nEquil, nEquilCool, nSample, nOutputs, nSnapshots, alphaA, alphaP, maxGibbmassA, maxGibbmassP, seed, messages, singleCellRNASeq, whichMatrixFixed, FP, checkpointInterval, cptFile, pumpThreshold, nPumpSamples, nCores) {
5 9
     .Call('_CoGAPS_cogaps_cpp', PACKAGE = 'CoGAPS', D, S, nFactor, nEquil, nEquilCool, nSample, nOutputs, nSnapshots, alphaA, alphaP, maxGibbmassA, maxGibbmassP, seed, messages, singleCellRNASeq, whichMatrixFixed, FP, checkpointInterval, cptFile, pumpThreshold, nPumpSamples, nCores)
6 10
 }
7 11
new file mode 100644
... ...
@@ -0,0 +1,12 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/CoGAPS.R
3
+\name{CoGAPSFromFile}
4
+\alias{CoGAPSFromFile}
5
+\title{CoGAPS with file input for matrix}
6
+\usage{
7
+CoGAPSFromFile(D)
8
+}
9
+\description{
10
+CoGAPS with file input for matrix
11
+}
12
+
... ...
@@ -1,9 +1,28 @@
1 1
 #include "math/SIMD.h"
2 2
 #include "GapsRunner.h"
3
+#include "file_parser/CsvParser.h"
3 4
 
4 5
 #include <Rcpp.h>
5 6
 #include <omp.h>
6 7
 
8
+// [[Rcpp::export]]
9
+Rcpp::List cogapsFromFile_cpp(const std::string D)
10
+{
11
+    CsvParser csv(D);
12
+
13
+    while (csv.hasNext())
14
+    {
15
+        MatrixElement m = csv.getNext();
16
+        Rcpp::Rcout << m.row << "," << m.col << "," << m.value << '\n';
17
+    }
18
+
19
+    for (unsigned i = 0; i < csv.mRowNames.size(); ++i)
20
+        Rcpp::Rcout << csv.mRowNames[i] << '\n';
21
+
22
+    for (unsigned i = 0; i < csv.mColNames.size(); ++i)
23
+        Rcpp::Rcout << csv.mColNames[i] << '\n';
24
+}
25
+
7 26
 // [[Rcpp::export]]
8 27
 Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix &D,
9 28
 const Rcpp::NumericMatrix &S, unsigned nFactor, unsigned nEquil,
... ...
@@ -5,6 +5,17 @@
5 5
 
6 6
 using namespace Rcpp;
7 7
 
8
+// cogapsFromFile_cpp
9
+Rcpp::List cogapsFromFile_cpp(const std::string D);
10
+RcppExport SEXP _CoGAPS_cogapsFromFile_cpp(SEXP DSEXP) {
11
+BEGIN_RCPP
12
+    Rcpp::RObject rcpp_result_gen;
13
+    Rcpp::RNGScope rcpp_rngScope_gen;
14
+    Rcpp::traits::input_parameter< const std::string >::type D(DSEXP);
15
+    rcpp_result_gen = Rcpp::wrap(cogapsFromFile_cpp(D));
16
+    return rcpp_result_gen;
17
+END_RCPP
18
+}
8 19
 // cogaps_cpp
9 20
 Rcpp::List cogaps_cpp(const Rcpp::NumericMatrix& D, const Rcpp::NumericMatrix& S, unsigned nFactor, unsigned nEquil, unsigned nEquilCool, unsigned nSample, unsigned nOutputs, unsigned nSnapshots, float alphaA, float alphaP, float maxGibbmassA, float maxGibbmassP, unsigned seed, bool messages, bool singleCellRNASeq, char whichMatrixFixed, const Rcpp::NumericMatrix& FP, unsigned checkpointInterval, const std::string& cptFile, unsigned pumpThreshold, unsigned nPumpSamples, unsigned nCores);
10 21
 RcppExport SEXP _CoGAPS_cogaps_cpp(SEXP DSEXP, SEXP SSEXP, SEXP nFactorSEXP, SEXP nEquilSEXP, SEXP nEquilCoolSEXP, SEXP nSampleSEXP, SEXP nOutputsSEXP, SEXP nSnapshotsSEXP, SEXP alphaASEXP, SEXP alphaPSEXP, SEXP maxGibbmassASEXP, SEXP maxGibbmassPSEXP, SEXP seedSEXP, SEXP messagesSEXP, SEXP singleCellRNASeqSEXP, SEXP whichMatrixFixedSEXP, SEXP FPSEXP, SEXP checkpointIntervalSEXP, SEXP cptFileSEXP, SEXP pumpThresholdSEXP, SEXP nPumpSamplesSEXP, SEXP nCoresSEXP) {
... ...
@@ -76,6 +87,7 @@ END_RCPP
76 87
 }
77 88
 
78 89
 static const R_CallMethodDef CallEntries[] = {
90
+    {"_CoGAPS_cogapsFromFile_cpp", (DL_FUNC) &_CoGAPS_cogapsFromFile_cpp, 1},
79 91
     {"_CoGAPS_cogaps_cpp", (DL_FUNC) &_CoGAPS_cogaps_cpp, 22},
80 92
     {"_CoGAPS_cogapsFromCheckpoint_cpp", (DL_FUNC) &_CoGAPS_cogapsFromCheckpoint_cpp, 7},
81 93
     {"_CoGAPS_getBuildReport_cpp", (DL_FUNC) &_CoGAPS_getBuildReport_cpp, 0},
... ...
@@ -51,9 +51,14 @@ RowMatrix::RowMatrix(const Rcpp::NumericMatrix &rmat)
51 51
 }
52 52
 
53 53
 RowMatrix::RowMatrix(const std::string &path)
54
-{
55
-
56
-}
54
+{/*
55
+    CsvParser csv(path);
56
+    while (csv.hasNext())
57
+    {
58
+        MatrixElement m = csv.getNext();
59
+        this->operator(m.row, m.col) = m.value;
60
+    }
61
+*/}
57 62
 
58 63
 void RowMatrix::operator=(const RowMatrix &mat)
59 64
 {
... ...
@@ -1,41 +1,52 @@
1 1
 #include "CsvParser.h"
2 2
 
3
+#include <iostream>
4
+
5
+// TODO need to parse by rows - otherwise it would be neccesary to
6
+// know dimensions beforehand
7
+
3 8
 // open file, read column names
4
-CsvParser::CsvParser(const std::string &path)
9
+CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
5 10
 {
6 11
     mFile.open(path.c_str());
7 12
 
8 13
     std::string line;
9
-    std::getline(mFile, line); // read first entry (blank)
10
-    
11
-    while (!std::isdigit(mFile.peek()))
14
+    std::getline(mFile, line, ','); // read first entry (blank)
15
+
16
+    std::size_t pos;
17
+    std::getline(mFile, line, ',');
18
+    while ((pos = line.find('\n')) == std::string::npos)
12 19
     {
13
-        std::getline(mFile, line)
14 20
         mColNames.push_back(line);
21
+        std::getline(mFile, line, ',');
15 22
     }
16
-    mRowNames.push_back(mColNames.back());
17
-    mColNames.pop_back(); // read one too far
23
+    mColNames.push_back(line.substr(0,pos));
24
+    mRowNames.push_back(line.substr(pos+1));
18 25
 }
19 26
 
20
-bool CsvParser::hasNext() const
27
+bool CsvParser::hasNext()
21 28
 {
22 29
     return mFile.peek() != EOF;
23 30
 }
24 31
 
25
-MatrixElement CsvParser::getNext() const
32
+MatrixElement CsvParser::getNext()
26 33
 {
27
-    int c = mFile.peek();
28
-    
29 34
     std::string line;
30
-    std::getline(mFile, line);
31
-    if (std::isdigit(c)) // matrix element
35
+    std::getline(mFile, line, ',');
36
+
37
+    std::size_t pos;
38
+    if ((pos = line.find('\n')) != std::string::npos)
32 39
     {
33
-        Rcout << line << '\n';
34
-        return MatrixElement(0,0,0.f);
40
+        if (pos + 1 < line.size())
41
+        {
42
+            mRowNames.push_back(line.substr(pos + 1));
43
+        }
44
+        unsigned col = mCurrentCol;
45
+        mCurrentCol = 0;
46
+        return MatrixElement(mCurrentRow++, col, line.substr(0, pos));
35 47
     }
36
-    else // row name
48
+    else
37 49
     {
38
-        mRowNames.push_back(line);
39
-        return getNext();
50
+        return MatrixElement(mCurrentRow, mCurrentCol++, line);
40 51
     }
41 52
 }
42 53
\ No newline at end of file
... ...
@@ -9,7 +9,8 @@
9 9
 
10 10
 class CsvParser
11 11
 {
12
-private:
12
+//private:
13
+public:
13 14
 
14 15
     std::ifstream mFile;
15 16
 
... ...
@@ -23,8 +24,8 @@ public:
23 24
 
24 25
     CsvParser(const std::string &path);
25 26
 
26
-    bool hasNext() const;
27
-    MatrixElement getNext() const; 
27
+    bool hasNext();
28
+    MatrixElement getNext(); 
28 29
 };
29 30
 
30 31
 #endif
31 32
\ No newline at end of file
... ...
@@ -1,6 +1,9 @@
1 1
 #ifndef __COGAPS_MATRIX_ELEMENT_H__
2 2
 #define __COGAPS_MARRIX_ELEMENT_H__
3 3
 
4
+#include <sstream>
5
+#include <string>
6
+
4 7
 struct MatrixElement
5 8
 {
6 9
     unsigned row;
... ...
@@ -10,6 +13,13 @@ struct MatrixElement
10 13
     MatrixElement(unsigned r, unsigned c, float v)
11 14
         : row(r), col(c), value(v)
12 15
     {}
16
+
17
+    MatrixElement(unsigned r, unsigned c, const std::string &v)
18
+        : row(r), col(c), value(0.f)
19
+    {
20
+        std::stringstream ss(v);
21
+        ss >> value;
22
+    }
13 23
 };
14 24
 
15 25
 #endif
16 26
\ No newline at end of file