Browse code

redo of csvparser interface

Tom Sherman authored on 25/05/2018 19:33:12
Showing9 changed files

... ...
@@ -8,21 +8,13 @@
8 8
 // [[Rcpp::export]]
9 9
 Rcpp::List cogapsFromFile_cpp(const std::string D)
10 10
 {
11
-    CsvParser csv(D);
12
-/*
13
-    while (csv.hasNext())
14
-    {
15
-        MatrixElement m = csv.getNext();
16
-        Rcpp::Rcout << m.row << "," << m.col << "," << m.value << '\n';
17
-    }
11
+    // CsvParser csv(D);
12
+    //Rcpp::Rcout << csv.nRow() << "," << csv.nCol() << '\n';
18 13
 
19
-    for (unsigned i = 0; i < csv.mRowNames.size(); ++i)
20
-        Rcpp::Rcout << csv.mRowNames[i] << '\n';
21
-
22
-    for (unsigned i = 0; i < csv.mColNames.size(); ++i)
23
-        Rcpp::Rcout << csv.mColNames[i] << '\n';
24
-*/
25
-    Rcpp::Rcout << csv.nRow() << "," << csv.nCol() << '\n';
14
+    //RowMatrix mat(D);
15
+    RowMatrix mat(D);
16
+    Rcpp::Rcout << mat.nRow() << "," << mat.nCol() << '\n';
17
+    //Rcpp::Rcout << gaps::algo::sum(mat) << '\n';
26 18
 }
27 19
 
28 20
 // [[Rcpp::export]]
... ...
@@ -1,5 +1,6 @@
1 1
 #include "Matrix.h"
2 2
 #include "../file_parser/CsvParser.h"
3
+#include "../file_parser/MatrixElement.h"
3 4
 
4 5
 template<class GenericMatrix>
5 6
 static Rcpp::NumericMatrix convertToRMatrix(const GenericMatrix &mat)
... ...
@@ -53,15 +54,24 @@ RowMatrix::RowMatrix(const Rcpp::NumericMatrix &rmat)
53 54
 
54 55
 RowMatrix::RowMatrix(const std::string &path)
55 56
 {
56
-    CsvParser csv(path);
57
-
58
-    while (csv.hasNextRow())
57
+    // get matrix dimensions
58
+    MatrixDimensions dim(CsvParser::getDimensions(path));
59
+    mNumRows = dim.nRow;
60
+    mNumCols = dim.nCol;
61
+        
62
+    // allocate matrix
63
+    for (unsigned i = 0; i < mNumRows; ++i)
59 64
     {
60
-        mRows.push_back(Vector(csv.getNextRow()));
65
+        mRows.push_back(Vector(mNumCols));
61 66
     }
62 67
 
63
-    mNumRows = mRows.size();
64
-    mNumCols = mRows[0].size();
68
+    // populate matrix
69
+    CsvParser csv(path);
70
+    while (csv.hasNext())
71
+    {
72
+        MatrixElement e(csv.getNext());
73
+        this->operator(e.row, e.col) = e.value;
74
+    }
65 75
 }
66 76
 
67 77
 void RowMatrix::operator=(const RowMatrix &mat)
... ...
@@ -1,5 +1,13 @@
1 1
 #include "Vector.h"
2 2
 
3
+Vector::Vector(const std::vector<float> &v) : mValues(v.size())
4
+{
5
+    for (unsigned i = 0; i < v.size(); ++i)
6
+    {
7
+        mValues[i] = v[i];
8
+    }
9
+}
10
+
3 11
 void Vector::concat(const Vector& vec)
4 12
 {
5 13
     mValues.insert(mValues.end(), vec.mValues.begin(), vec.mValues.end());
... ...
@@ -18,7 +18,7 @@ private:
18 18
 public:
19 19
 
20 20
     Vector(unsigned size) : mValues(aligned_vector(size, 0.f)) {}
21
-    Vector(std::vector<float> v) : mValues(aligned_vector(v)) {}
21
+    Vector(const std::vector<float> &v);
22 22
 
23 23
     const float* ptr() const {return &mValues[0];}
24 24
     float* ptr() {return &mValues[0];}
... ...
@@ -1,78 +1,77 @@
1 1
 #include "CsvParser.h"
2
+#include "../math/Algorithms.h"
2 3
 
3 4
 #include <iostream>
4 5
 
5
-// read through whole file once, store row/col names - gives dimensions
6
-// open file, read column names
7
-CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
6
+// get the number of rows and cols in a csv file
7
+MatrixDimension CsvParser::getDimensions(const std::string &path)
8 8
 {
9
-    mFile.open(path.c_str());
9
+    // initialize struct that holds dimensions
10
+    MatrixDimension dim(0,0);
11
+
12
+    // open file stream
13
+    std::ifstream str(path);
10 14
 
15
+    // read first entry (blank)
11 16
     std::string line;
12
-    std::getline(mFile, line, ','); // read first entry (blank)
17
+    std::getline(mFile, line, ',');
13 18
 
19
+    // get col size
14 20
     std::size_t pos;
15
-    std::getline(mFile, line, ',');
16
-    while ((pos = line.find('\n')) == std::string::npos)
21
+    do
17 22
     {
18
-        mColNames.push_back(line);
19 23
         std::getline(mFile, line, ',');
24
+        dim.nCol++;
20 25
     }
21
-    mColNames.push_back(line.substr(0,pos));
26
+    while ((pos = line.find('\n')) == std::string::npos);
22 27
 
23
-    mRowNames.push_back(line.substr(pos+1));
28
+    // get row size
29
+    dim.nRow++; // acount for current row
24 30
     while (mFile.peek() != EOF)
25 31
     {
26
-        while ((pos = line.find('\n')) == std::string::npos)
32
+        // throw away data
33
+        do
27 34
         {
28 35
             std::getline(mFile, line, ',');
29 36
         }
30
-        mRowNames.push_back(line.substr(pos+1));
31
-    }
37
+        while ((pos = line.find('\n')) == std::string::npos);
32 38
 
33
-    for (unsigned i = 0; i < mRowNames.size(); ++i)
34
-    {
35
-        std::cout << mRowNames[i] << '\n';
36
-    }
37
-    for (unsigned j = 0; j < mColNames.size(); ++j)
38
-    {
39
-        std::cout << mRowNames[j] << '\n';
39
+        // increment row number, ignore last newline in file
40
+        if (pos + 1 < line.size())
41
+        {
42
+            dim.nRow++;
43
+        }
40 44
     }
45
+    return dim;
41 46
 }
42 47
 
43
-bool hasNextRow()
44
-{
45
-
46
-}
47
-
48
-std::vector<float> getNextRow()
48
+// read through whole file once, store row/col names - gives dimensions
49
+// open file, read column names
50
+CsvParser::CsvParser(const std::string &path) : mCurrentRow(0), mCurrentCol(0)
49 51
 {
50
-
52
+    mFile.open(path);
51 53
 }
52 54
 
53
-void skipNextRow()
55
+bool CsvParser::hasNext()
54 56
 {
55
-
57
+    return mFile.peek() != EOF;
56 58
 }
57 59
 
58
-/*MatrixElement CsvParser::getNext()
60
+MatrixElement CsvParser::getNext()
59 61
 {
60 62
     std::string line;
61
-    std::getline(mFile, line, ',');
62
-
63 63
     std::size_t pos;
64
-    if ((pos = line.find('\n')) != std::string::npos)
64
+    std::getline(mFile, line, ',');
65
+    if ((pos = line.find('\n')) != std::string::npos) // end of line
65 66
     {
66
-        if (pos + 1 < line.size())
67
-        {
68
-            mRowNames.push_back(line.substr(pos + 1));
69
-        }
70
-        unsigned col = mCurrentCol;
71
-        mCurrentCol = 0;
72
-        return MatrixElement(mCurrentRow++, col, line.substr(0, pos));
67
+        return MatrixElement(mCurrentRow, mCurrentCol, line.substr(0, pos));
73 68
     }
74
-    else
69
+    else if (std::isdigit(line[0])) // data
75 70
     {
76
-        return MatrixElement(mCurrentRow, mCurrentCol++, line);
71
+        return MatrixElement(mCurrentRow, mCurrentCol, line);
77 72
     }
78
-}*/
79 73
\ No newline at end of file
74
+    else // row/col name
75
+    {
76
+        return getNext();
77
+    }
78
+}
... ...
@@ -1,6 +1,8 @@
1 1
 #ifndef __COGAPS_CSV_PARSER_H__
2 2
 #define __COGAPS_CSV_PARSER_H__
3 3
 
4
+#include "MatrixElement.h"
5
+
4 6
 #include <fstream>
5 7
 #include <vector>
6 8
 #include <string>
... ...
@@ -11,27 +13,17 @@ private:
11 13
 
12 14
     std::ifstream mFile;
13 15
 
14
-    std::vector<std::string> mRowNames;
15
-    std::vector<std::string> mColNames;
16
-
17 16
     unsigned mCurrentRow;
18 17
     unsigned mCurrentCol;
19 18
 
20 19
 public:
21 20
 
22
-    // read through whole file once, store row/col names - gives dimensions
23 21
     CsvParser(const std::string &path);
24 22
 
25
-    unsigned nRow() const { return mRowNames.size(); }
26
-    unsigned nCol() const { return mColNames.size(); }
27
-
28
-    bool hasNextRow();
29
-    std::vector<float> getNextRow(); 
30
-    void skipNextRow();
23
+    bool hasNext();
24
+    MatrixElement getNext();
31 25
 
32
-    bool hasNextCol();
33
-    std::vector<float> getNextCol();
34
-    void skipNextCol();
26
+    static MatrixDimension getDimensions(const std::string &path);
35 27
 };
36 28
 
37 29
 #endif
38 30
\ No newline at end of file
39 31
new file mode 100644
... ...
@@ -0,0 +1,33 @@
1
+#ifndef __COGAPS_MATRIX_ELEMENT_H__
2
+#define __COGAPS_MATRIX_ELEMENT_H__
3
+
4
+#include <sstream>
5
+#include <string>
6
+
7
+struct MatrixElement
8
+{
9
+    unsigned row;
10
+    unsigned col;
11
+    float value;
12
+
13
+    MatrixElement(unsigned r, unsigned c, float v)
14
+        : row(r), col(c), value(v)
15
+    {}
16
+
17
+    MatrixElement(unsigned r, unsigned c, const std::string &s)
18
+        : row(r), col(c), value(0.f)
19
+    {
20
+        std::stringstream ss(s);
21
+        ss >> value;
22
+    }
23
+};
24
+
25
+struct MatrixDimension
26
+{
27
+    unsigned nRow;
28
+    unsigned nCol;
29
+
30
+    MatrixDimension(unsigned nr, unsigned nc) : nRow(nr), nCol(nc) {}
31
+};
32
+
33
+#endif
0 34
\ No newline at end of file
... ...
@@ -3,6 +3,16 @@
3 3
 #include "SIMD.h"
4 4
 
5 5
 #include <algorithm>
6
+#include <string>
7
+#include <sstream>
8
+
9
+float gaps::algo::stringToFloat(const std::string &s)
10
+{
11
+    std::stringstream ss(s);
12
+    float f;
13
+    ss >> f;
14
+    return f;
15
+}
6 16
 
7 17
 float gaps::algo::sum(const Vector &vec)
8 18
 {
... ...
@@ -29,6 +29,8 @@ namespace algo
29 29
     const float epsilon = 1.0e-10f;
30 30
     const float pi = 3.14159265358979323846264f;
31 31
 
32
+    float stringToFloat(const std::string &s);
33
+
32 34
     bool isVectorZero(const float *vec, unsigned size);
33 35
 
34 36
     // vector algorithms