Browse code

csv parser progress

Tom Sherman authored on 25/05/2018 14:03:32
Showing7 changed files

... ...
@@ -1,5 +1,5 @@
1 1
 Package: CoGAPS
2
-Version: 3.1.0
2
+Version: 3.1.1
3 3
 Date: 2018-04-24
4 4
 Title: Coordinated Gene Activity in Pattern Sets
5 5
 Author: Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
... ...
@@ -9,7 +9,7 @@
9 9
 Rcpp::List cogapsFromFile_cpp(const std::string D)
10 10
 {
11 11
     CsvParser csv(D);
12
-
12
+/*
13 13
     while (csv.hasNext())
14 14
     {
15 15
         MatrixElement m = csv.getNext();
... ...
@@ -21,6 +21,8 @@ Rcpp::List cogapsFromFile_cpp(const std::string D)
21 21
 
22 22
     for (unsigned i = 0; i < csv.mColNames.size(); ++i)
23 23
         Rcpp::Rcout << csv.mColNames[i] << '\n';
24
+*/
25
+    Rcpp::Rcout << csv.nRow() << "," << csv.nCol() << '\n';
24 26
 }
25 27
 
26 28
 // [[Rcpp::export]]
... ...
@@ -1,4 +1,5 @@
1 1
 #include "Matrix.h"
2
+#include "../file_parser/CsvParser.h"
2 3
 
3 4
 template<class GenericMatrix>
4 5
 static Rcpp::NumericMatrix convertToRMatrix(const GenericMatrix &mat)
... ...
@@ -51,14 +52,17 @@ RowMatrix::RowMatrix(const Rcpp::NumericMatrix &rmat)
51 52
 }
52 53
 
53 54
 RowMatrix::RowMatrix(const std::string &path)
54
-{/*
55
+{
55 56
     CsvParser csv(path);
56
-    while (csv.hasNext())
57
+
58
+    while (csv.hasNextRow())
57 59
     {
58
-        MatrixElement m = csv.getNext();
59
-        this->operator(m.row, m.col) = m.value;
60
+        mRows.push_back(Vector(csv.getNextRow()));
60 61
     }
61
-*/}
62
+
63
+    mNumRows = mRows.size();
64
+    mNumCols = mRows[0].size();
65
+}
62 66
 
63 67
 void RowMatrix::operator=(const RowMatrix &mat)
64 68
 {
... ...
@@ -127,6 +131,8 @@ ColMatrix::ColMatrix(const Rcpp::NumericMatrix &rmat)
127 131
     }
128 132
 }
129 133
 
134
+// skip over bytes per row to efficiently get to columns
135
+// need to be able to read columns in parallel
130 136
 ColMatrix::ColMatrix(const std::string &path)
131 137
 {
132 138
 
... ...
@@ -18,6 +18,7 @@ private:
18 18
 public:
19 19
 
20 20
     Vector(unsigned size) : mValues(aligned_vector(size, 0.f)) {}
21
+    Vector(std::vector<float> v) : mValues(aligned_vector(v)) {}
21 22
 
22 23
     const float* ptr() const {return &mValues[0];}
23 24
     float* ptr() {return &mValues[0];}
... ...
@@ -2,9 +2,7 @@
2 2
 
3 3
 #include <iostream>
4 4
 
5
-// TODO need to parse by rows - otherwise it would be neccesary to
6
-// know dimensions beforehand
7
-
5
+// read through whole file once, store row/col names - gives dimensions
8 6
 // open file, read column names
9 7
 CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
10 8
 {
... ...
@@ -21,15 +19,43 @@ CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
21 19
         std::getline(mFile, line, ',');
22 20
     }
23 21
     mColNames.push_back(line.substr(0,pos));
22
+
24 23
     mRowNames.push_back(line.substr(pos+1));
24
+    while (mFile.peek() != EOF)
25
+    {
26
+        while ((pos = line.find('\n')) == std::string::npos)
27
+        {
28
+            std::getline(mFile, line, ',');
29
+        }
30
+        mRowNames.push_back(line.substr(pos+1));
31
+    }
32
+
33
+    for (unsigned i = 0; i < mRowNames.size(); ++i)
34
+    {
35
+        std::cout << mRowNames[i] << '\n';
36
+    }
37
+    for (unsigned j = 0; j < mColNames.size(); ++j)
38
+    {
39
+        std::cout << mRowNames[j] << '\n';
40
+    }
41
+}
42
+
43
+bool hasNextRow()
44
+{
45
+
25 46
 }
26 47
 
27
-bool CsvParser::hasNext()
48
+std::vector<float> getNextRow()
28 49
 {
29
-    return mFile.peek() != EOF;
50
+
51
+}
52
+
53
+void skipNextRow()
54
+{
55
+
30 56
 }
31 57
 
32
-MatrixElement CsvParser::getNext()
58
+/*MatrixElement CsvParser::getNext()
33 59
 {
34 60
     std::string line;
35 61
     std::getline(mFile, line, ',');
... ...
@@ -49,4 +75,4 @@ MatrixElement CsvParser::getNext()
49 75
     {
50 76
         return MatrixElement(mCurrentRow, mCurrentCol++, line);
51 77
     }
52
-}
53 78
\ No newline at end of file
79
+}*/
54 80
\ No newline at end of file
... ...
@@ -1,16 +1,13 @@
1 1
 #ifndef __COGAPS_CSV_PARSER_H__
2 2
 #define __COGAPS_CSV_PARSER_H__
3 3
 
4
-#include "MatrixElement.h"
5
-
6 4
 #include <fstream>
7 5
 #include <vector>
8 6
 #include <string>
9 7
 
10 8
 class CsvParser
11 9
 {
12
-//private:
13
-public:
10
+private:
14 11
 
15 12
     std::ifstream mFile;
16 13
 
... ...
@@ -22,10 +19,19 @@ public:
22 19
 
23 20
 public:
24 21
 
22
+    // read through whole file once, store row/col names - gives dimensions
25 23
     CsvParser(const std::string &path);
26 24
 
27
-    bool hasNext();
28
-    MatrixElement getNext(); 
25
+    unsigned nRow() const { return mRowNames.size(); }
26
+    unsigned nCol() const { return mColNames.size(); }
27
+
28
+    bool hasNextRow();
29
+    std::vector<float> getNextRow(); 
30
+    void skipNextRow();
31
+
32
+    bool hasNextCol();
33
+    std::vector<float> getNextCol();
34
+    void skipNextCol();
29 35
 };
30 36
 
31 37
 #endif
32 38
\ No newline at end of file
33 39
deleted file mode 100644
... ...
@@ -1,25 +0,0 @@
1
-#ifndef __COGAPS_MATRIX_ELEMENT_H__
2
-#define __COGAPS_MARRIX_ELEMENT_H__
3
-
4
-#include <sstream>
5
-#include <string>
6
-
7
-struct MatrixElement
8
-{
9
-    unsigned row;
10
-    unsigned col;
11
-    float value;
12
-
13
-    MatrixElement(unsigned r, unsigned c, float v)
14
-        : row(r), col(c), value(v)
15
-    {}
16
-
17
-    MatrixElement(unsigned r, unsigned c, const std::string &v)
18
-        : row(r), col(c), value(0.f)
19
-    {
20
-        std::stringstream ss(v);
21
-        ss >> value;
22
-    }
23
-};
24
-
25
-#endif
26 0
\ No newline at end of file