e9f9e80b |
////////////////////////////////////////////////////////////////
|
a48b6567 |
//
// Copyright (C) 2005 Affymetrix, Inc.
//
// This library is free software; you can redistribute it and/or modify
|
e9f9e80b |
// it under the terms of the GNU Lesser General Public License
// (version 2.1) as published by the Free Software Foundation.
//
|
a48b6567 |
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
// for more details.
|
e9f9e80b |
//
|
a48b6567 |
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
|
e9f9e80b |
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
a48b6567 |
//
|
e9f9e80b |
////////////////////////////////////////////////////////////////
|
a48b6567 |
#ifndef _DataSet_HEADER_
#define _DataSet_HEADER_
|
7610fa82 |
#ifdef _MSC_VER
|
a48b6567 |
#include <windows.h>
#endif
|
a5a28055 |
#include "calvin_files/data/src/DataException.h"
#include "calvin_files/data/src/DataSetHeader.h"
#include "calvin_files/portability/src/AffymetrixBaseTypes.h"
#include "calvin_files/utils/src/AffyStlCollectionTypes.h"
//
#include <cstring>
#include <fstream>
#include <string>
//
|
a48b6567 |
namespace affymetrix_calvin_io
{
// forward declare
class GenericData;
/*! This class provides methods to access the data of a DataSet. */
class DataSet
{
public:
|
030d93e1 |
/*! Constructor. Use this constructor do access the data using memory-mapping.
* On Windows, memory-mapping will be restricted to 200MB view of the DataSet data.
|
a48b6567 |
* @param fileName The name of the generic file to access.
* @param header The DataSetHeader of the DataSet to access.
* @param handle A handle to the file mapping object
|
030d93e1 |
* @param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer.
|
a48b6567 |
*/
|
030d93e1 |
DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, void* handle, bool loadEntireDataSetHint=false);
/*! Constructor. Use this constructor do access the data using std::ifstream.
* With fstream access the entire DataSet data will be read into memory.
* @param fileName The name of the generic file to access.
* @param header The DataSetHeader of the DataSet to access.
* @param ifs A reference to an open ifstream.
* @param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer.
*/
DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, std::ifstream& ifs, bool loadEntireDataSetHint=false);
|
a48b6567 |
public:
/*! Method to release memory held by this object. Closes object before deleting. */
void Delete();
/*! Method to open the DataSet to access the data.
* @return true if successful
*/
bool Open();
/*! Method to close the DataSet. */
void Close();
/*! Method to get a reference to the DataSetHeader
* @return A reference to the DataSetHeader.
*/
const affymetrix_calvin_io::DataSetHeader& Header() { return header; }
/*! Return the number of rows in the DataSet. */
int32_t Rows() { return header.GetRowCnt(); }
/*! Return the number of columns in the DataSet. */
int32_t Cols() { return header.GetColumnCnt(); }
/*! Determines if the DataSet is open
* @return true if the DataSet is open
*/
|
030d93e1 |
bool IsOpen() { return (isOpen); }
|
a48b6567 |
/*! Provides access to single data elements
* @param row Row index.
* @param col Column index.
* @param value Reference to the data type to fill with the data.
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
*/
void GetData(int32_t row, int32_t col, u_int8_t& value);
void GetData(int32_t row, int32_t col, int8_t& value);
void GetData(int32_t row, int32_t col, u_int16_t& value);
void GetData(int32_t row, int32_t col, int16_t& value);
void GetData(int32_t row, int32_t col, u_int32_t& value);
void GetData(int32_t row, int32_t col, int32_t& value);
void GetData(int32_t row, int32_t col, float& value);
void GetData(int32_t row, int32_t col, std::string& value);
void GetData(int32_t row, int32_t col, std::wstring& value);
/*! Provides access to multiple data elements in the same column.
* If count elements could not be read, it is not considered an error. The vector
* is filled with only the data that could be read.
* @param col Column index.
* @param startRow Row index of the data to be inserted into the vector at [0].
* @param count Number of elements to retrieve. -1 indicates to read all
|
030d93e1 |
* @param values Reference to the data type to fill with the data.
|
a48b6567 |
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
*/
void GetData(int32_t col, int32_t startRow, int32_t count, Uint8Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, Int8Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, Uint16Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, Int16Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, Uint32Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, Int32Vector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, FloatVector& values);
void GetData(int32_t col, int32_t startRow, int32_t count, std::vector<std::string>& values);
void GetData(int32_t col, int32_t startRow, int32_t count, WStringVector& values);
/*! Provides access to multiple data elements in the same column.
* The caller is responsible for allocating the storage to which count element values can be written.
* If count elements could not be read, it is not considered an error. The array
* is filled with only the data that could be read.
* @param col Column index.
* @param startRow Row index of the data to be inserted into the vector at [0].
* @param count Number of elements to retrieve. -1 indicates to read all
|
030d93e1 |
* @param values Reference to the data type to fill with the data.
|
a48b6567 |
* @return Number of elements read.
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
*/
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int8_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int8_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int16_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int16_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int32_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int32_t* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, float* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::string* values);
int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::wstring* values);
/*! Check that the requested data matches the type of data in the column and that row and column are in bounds.
* @param row Row index to check.
* @param col Column index to check.
* @param type Column type to check.
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
* @exception affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException The column index is out-of-bounds.
* @exception affymetrix_calvin_exceptions::RowIndexOutOfBoundsException The row index is out-of-bounds.
* @exception affymetrix_calvin_exceptions::UnexpectedColumnTypeException The column type does not match the type requested.
*/
void CheckRowColumnAndType(int32_t row, int32_t col, affymetrix_calvin_io::DataSetColumnTypes type);
//protected:
/*! Return the bytes per row.
* @return Bytes in a row.
*/
int32_t BytesPerRow() { return columnByteOffsets[header.GetColumnCnt()]; }
|
158e02d3 |
//protected:
|
a48b6567 |
/*! Destructor. */
~DataSet();
|
158e02d3 |
protected:
|
030d93e1 |
/*! Open the DataSet using memory-mapping
* @return True if the DataSet was successully mapped.
*/
bool OpenMM();
/*! Read the DataSet data into a buffer using ifstream::read.
*/
void ReadDataSetUsingStream();
|
a48b6567 |
/*! Close the memory mapped file. */
void UnmapFile();
|
030d93e1 |
/*! Delete the DataSet data read in by ifstream::read */
void ClearStreamData();
|
a48b6567 |
|
030d93e1 |
/*! Returns the address of a data element given a row and column. Ensures that data from rowStart
* to rowCount+rowStart are mapped unless that is larger than the mapped window.
* @param rowStart Row index
|
a48b6567 |
* @param col Column index
|
030d93e1 |
* @param rowCount The number of rows to ensure are mapped starting at rowStart
* @return Pointer to the data element at rowStart
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open.
|
4bc22e97 |
* @exception affymetrix_calvin_exceptions::DataSetRemapException There was an error during a remap.
|
a48b6567 |
*/
|
030d93e1 |
char* FilePosition(int32_t rowStart, int32_t col, int32_t rowCount=1);
|
a48b6567 |
/*! Returns the address of a data element given a row and column. Ensures that data from rowStart
|
030d93e1 |
* to rowCount+rowStart are copied from the file into a memory buffer. The memory buffer will
* remain intact until the next call to LoadDataAndReturnFilePosition.
|
a48b6567 |
* @param rowStart Row index
* @param col Column index
* @param rowCount The number of rows to ensure are mapped starting at rowStart
* @return Pointer to the data element at rowStart
|
030d93e1 |
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open.
|
a48b6567 |
*/
|
030d93e1 |
char* LoadDataAndReturnFilePosition(int32_t rowStart, int32_t col, int32_t rowCount);
|
a48b6567 |
/*! Updates the columnByteOffsets member. */
void UpdateColumnByteOffsets();
/*! Computes the index of the row after last to read.
* @param startRow Row index of the data to be inserted into the vector at [0].
* @param count Number of elements to be retrieved. -1 indicates read to the last element.
* @return Index of row after the last row to read.
*/
int32_t ComputeEndRow(int32_t startRow, int32_t count);
/*! Clears and resizes the vector
* @param values Reference to a vector to clear and resize.
* @param size Target size of the vector
*/
template<typename T> void ClearAndSizeVector(std::vector<T>& values, u_int32_t size);
|
e9f9e80b |
/*! Template method to get data into a vector
* @param col Column index.
* @param startRow Row index of the data to be inserted into the vector at [0].
* @param count Number of elements to retrieve. -1 indicates to read all
* @param values Reference to the data type to fill with the data.
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
*/
template<typename T> void GetDataT(int32_t col, int32_t startRow, int32_t count, T& values);
/*! Template method to get data into an array
* @param col Column index.
* @param startRow Row index of the data to be inserted into the vector at [0].
* @param count Number of elements to retrieve. -1 indicates to read all
* @param values Reference to the data type to fill with the data.
* @return Number of elements read.
* @exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
*/
template<typename T> int32_t GetDataRawT(int32_t col, int32_t startRow, int32_t count, T* values);
/*! Returns the index of the last row mapped.
* @return Index of the last row mapped.
*/
int32_t LastRowMapped();
|
a48b6567 |
/*! Platform specific memory-mapping method */
|
7610fa82 |
#ifdef _MSC_VER
|
030d93e1 |
|
a48b6567 |
bool MapDataWin32(u_int32_t start, u_int32_t bytes);
|
030d93e1 |
|
a48b6567 |
#else
|
030d93e1 |
|
a48b6567 |
bool MapDataPosix(u_int32_t start, u_int32_t bytes);
|
030d93e1 |
|
a48b6567 |
#endif
|
e9f9e80b |
/*! Reads from the instr pointer into the vector at the index indicated.
* @param index Index to the vector where to write the value.
* @param values The vector into which to write the value.
* @param instr A pointer to the data in the memory buffer. The pointer is advanced by the method.
*/
void AssignValue(int32_t index, Uint8Vector& values, char*& instr);
void AssignValue(int32_t index, Int8Vector& values, char*& instr);
void AssignValue(int32_t index, Uint16Vector& values, char*& instr);
void AssignValue(int32_t index, Int16Vector& values, char*& instr);
void AssignValue(int32_t index, Uint32Vector& values, char*& instr);
void AssignValue(int32_t index, Int32Vector& values, char*& instr);
void AssignValue(int32_t index, FloatVector& values, char*& instr);
void AssignValue(int32_t index, StringVector& values, char*& instr);
void AssignValue(int32_t index, WStringVector& values, char*& instr);
void AssignValue(int32_t index, u_int8_t* values, char*& instr);
void AssignValue(int32_t index, int8_t* values, char*& instr);
void AssignValue(int32_t index, u_int16_t* values, char*& instr);
void AssignValue(int32_t index, int16_t* values, char*& instr);
void AssignValue(int32_t index, u_int32_t* values, char*& instr);
void AssignValue(int32_t index, int32_t* values, char*& instr);
void AssignValue(int32_t index, float* values, char*& instr);
void AssignValue(int32_t index, std::string* values, char*& instr);
void AssignValue(int32_t index, std::wstring* values, char*& instr);
|
a48b6567 |
protected:
|
be3ad048 |
/*! name of the file containing the data data set*. */
|
a48b6567 |
std::string fileName;
/*! copy of the DataSetHeader */
affymetrix_calvin_io::DataSetHeader header;
/*! pointer to the mapped data, doesn't account for allocation granularity. */
void* mappedData;
|
030d93e1 |
/*! pointer to the data. In memory-mapping mode, the pointer has been adjusted for the allocation granularity. */
|
a48b6567 |
char* data;
/*! Array of column byte offsets. Updated when the file is opened.
* There are columns + 1 elements
*/
|
030d93e1 |
Int32Vector columnByteOffsets;
|
a48b6567 |
|
7610fa82 |
#ifdef _MSC_VER
|
030d93e1 |
|
a48b6567 |
/*! Handle returned by CreateFileMapping */
HANDLE fileMapHandle;
/*! Maximum size of the view to map */
static const u_int32_t MaxViewSize = 200*1024*1024; // 200MB
|
030d93e1 |
|
a48b6567 |
#else
|
030d93e1 |
|
a48b6567 |
FILE *fp;
|
030d93e1 |
|
a48b6567 |
#endif
|
030d93e1 |
/*! Indicates if the DataSet is open*/
bool isOpen;
|
a48b6567 |
/*! Byte offset to the start of the view */
u_int32_t mapStart;
/*! Number of bytes mapped to the view */
u_int32_t mapLen;
|
030d93e1 |
/*! A flag the indicates the data access mode. True = access the data using memory-mapping. False = access the data using std::ifstream */
bool useMemoryMapping;
/*! An open ifstream object */
std::ifstream* fileStream;
/*! Indicates whether to attempt to read all data into a memory buffer. */
bool loadEntireDataSetHint;
|
a48b6567 |
};
}
|
030d93e1 |
#endif // _DataSet_HEADER_
|