src/fusion/calvin_files/data/src/DataSet.h
e9f9e80b
 ////////////////////////////////////////////////////////////////
a48b6567
 //
 // Copyright (C) 2005 Affymetrix, Inc.
 //
 // This library is free software; you can redistribute it and/or modify
e9f9e80b
 // it under the terms of the GNU Lesser General Public License 
 // (version 2.1) as published by the Free Software Foundation.
 // 
a48b6567
 // This library is distributed in the hope that it will be useful, but
 // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
 // for more details.
e9f9e80b
 // 
a48b6567
 // You should have received a copy of the GNU Lesser General Public License
 // along with this library; if not, write to the Free Software Foundation, Inc.,
e9f9e80b
 // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
a48b6567
 //
e9f9e80b
 ////////////////////////////////////////////////////////////////
 
a48b6567
 
 #ifndef _DataSet_HEADER_
 #define _DataSet_HEADER_
 
7610fa82
 #ifdef _MSC_VER
a48b6567
 #include <windows.h>
 #endif
 
a5a28055
 #include "calvin_files/data/src/DataException.h"
 #include "calvin_files/data/src/DataSetHeader.h"
 #include "calvin_files/portability/src/AffymetrixBaseTypes.h"
 #include "calvin_files/utils/src/AffyStlCollectionTypes.h"
 //
 #include <cstring>
 #include <fstream>
 #include <string>
 //
 
 
a48b6567
 namespace affymetrix_calvin_io
 {
 
 // forward declare
 class GenericData;
 
 /*! This class provides methods to access the data of a DataSet. */
 class DataSet
 {
 public:
030d93e1
 	/*! Constructor. Use this constructor do access the data using memory-mapping.
 	 *	On Windows, memory-mapping will be restricted to 200MB view of the DataSet data.
a48b6567
 	 *	@param fileName The name of the generic file to access.
 	 *	@param header The DataSetHeader of the DataSet to access.
 	 *	@param handle A handle to the file mapping object
030d93e1
 	 *	@param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer.
a48b6567
 	 */
030d93e1
 	DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, void* handle, bool loadEntireDataSetHint=false);
 
 
 	/*! Constructor. Use this constructor do access the data using std::ifstream.
 	 *	With fstream access the entire DataSet data will be read into memory.
 	 *	@param fileName The name of the generic file to access.
 	 *	@param header The DataSetHeader of the DataSet to access.
 	 *	@param ifs A reference to an open ifstream.
 	 *	@param loadEntireDataSetHint Indicate if DataSet will attempt to read the entire DataSet data into a memory buffer.
 	 */
 	DataSet(const std::string& fileName, const affymetrix_calvin_io::DataSetHeader& header, std::ifstream& ifs, bool loadEntireDataSetHint=false);
a48b6567
 
 public:
 	/*! Method to release memory held by this object.  Closes object before deleting. */
 	void Delete();
 
 	/*! Method to open the DataSet to access the data.
 	 *	@return true if successful
 	 */
 	bool Open();
 
 	/*! Method to close the DataSet. */
 	void Close();
 
 	/*! Method to get a reference to the DataSetHeader
 	 *	@return A reference to the DataSetHeader.
 	 */
 	const affymetrix_calvin_io::DataSetHeader& Header() { return header; }
 
 	/*! Return the number of rows in the DataSet. */
 	int32_t Rows() { return header.GetRowCnt(); }
 
 	/*! Return the number of columns in the DataSet. */
 	int32_t Cols() { return header.GetColumnCnt(); }
 
 	/*! Determines if the DataSet is open
 	 *	@return true if the DataSet is open
 	 */
030d93e1
 	bool IsOpen() { return (isOpen); }
a48b6567
 
 	/*! Provides access to single data elements
 	 *	@param row Row index.
 	 *	@param col Column index.
 	 *	@param value Reference to the data type to fill with the data.
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	 */
 	void GetData(int32_t row, int32_t col, u_int8_t& value);
 
 	void GetData(int32_t row, int32_t col, int8_t& value);
 
 	void GetData(int32_t row, int32_t col, u_int16_t& value);
 
 	void GetData(int32_t row, int32_t col, int16_t& value);
 
 	void GetData(int32_t row, int32_t col, u_int32_t& value);
 
 	void GetData(int32_t row, int32_t col, int32_t& value);
 
 	void GetData(int32_t row, int32_t col, float& value);
 
 	void GetData(int32_t row, int32_t col, std::string& value);
 
 	void GetData(int32_t row, int32_t col, std::wstring& value);
 
 	/*! Provides access to multiple data elements in the same column.
 	 *	If count elements could not be read, it is not considered an error.  The vector
 	 *	is filled with only the data that could be read.
 	 *	@param col Column index.
 	 *	@param startRow Row index of the data to be inserted into the vector at [0].
 	 *	@param count Number of elements to retrieve. -1 indicates to read all
030d93e1
 	 *	@param values Reference to the data type to fill with the data.
a48b6567
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	 */
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Uint8Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Int8Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Uint16Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Int16Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Uint32Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, Int32Vector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, FloatVector& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, std::vector<std::string>& values);
 
 	void GetData(int32_t col, int32_t startRow, int32_t count, WStringVector& values);
 
 	/*! Provides access to multiple data elements in the same column.
 	 *	The caller is responsible for allocating the storage to which count element values can be written.
 	 *	If count elements could not be read, it is not considered an error.  The array
 	 *	is filled with only the data that could be read.
 	 *	@param col Column index.
 	 *	@param startRow Row index of the data to be inserted into the vector at [0].
 	 *	@param count Number of elements to retrieve. -1 indicates to read all
030d93e1
 	 *	@param values Reference to the data type to fill with the data.
a48b6567
 	 *	@return Number of elements read.
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	*/
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int8_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int8_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int16_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int16_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int32_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, int32_t* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, float* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::string* values);
 
 	int32_t GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::wstring* values);
 
 	/*! Check that the requested data matches the type of data in the column and that row and column are in bounds.
 	 *	@param row Row index to check.
 	 *	@param col Column index to check.
 	 *	@param type Column type to check.
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	 *	@exception affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException The column index is out-of-bounds.
 	 *	@exception affymetrix_calvin_exceptions::RowIndexOutOfBoundsException The row index is out-of-bounds.
 	 *	@exception affymetrix_calvin_exceptions::UnexpectedColumnTypeException The column type does not match the type requested.
 	 */
 	void CheckRowColumnAndType(int32_t row, int32_t col, affymetrix_calvin_io::DataSetColumnTypes type);
 
 //protected:
 
 	/*! Return the bytes per row.
 	 *	@return Bytes in a row.
 	 */
 	int32_t BytesPerRow() { return columnByteOffsets[header.GetColumnCnt()]; }
 
158e02d3
 //protected:
a48b6567
 	/*! Destructor. */
 	~DataSet();
 
158e02d3
 protected:
030d93e1
 	/*! Open the DataSet using memory-mapping
 	 *	@return True if the DataSet was successully mapped.
 	 */
 	bool OpenMM();
 
 	/*! Read the DataSet data into a buffer using ifstream::read.
 	 */
 	void ReadDataSetUsingStream();
a48b6567
 
 	/*! Close the memory mapped file. */
 	void UnmapFile();
 
030d93e1
 	/*! Delete the DataSet data read in by ifstream::read */
 	void ClearStreamData();
a48b6567
 
030d93e1
 	/*! Returns the address of a data element given a row and column.  Ensures that data from rowStart
 	 *	to rowCount+rowStart are mapped unless that is larger than the mapped window.
 	 *	@param rowStart Row index
a48b6567
 	 *	@param col Column index
030d93e1
 	 *	@param rowCount The number of rows to ensure are mapped starting at rowStart
 	 *	@return Pointer to the data element at rowStart
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open.
4bc22e97
 	 *	@exception affymetrix_calvin_exceptions::DataSetRemapException There was an error during a remap.
a48b6567
 	 */
030d93e1
 	char* FilePosition(int32_t rowStart, int32_t col, int32_t rowCount=1);
a48b6567
 
 	/*! Returns the address of a data element given a row and column.  Ensures that data from rowStart
030d93e1
 	 *	to rowCount+rowStart are copied from the file into a memory buffer.  The memory buffer will
 	 *	remain intact until the next call to LoadDataAndReturnFilePosition.
a48b6567
 	 *	@param rowStart Row index
 	 *	@param col Column index
 	 *	@param rowCount The number of rows to ensure are mapped starting at rowStart
 	 *	@return Pointer to the data element at rowStart
030d93e1
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not open.
a48b6567
 	 */
030d93e1
 	char* LoadDataAndReturnFilePosition(int32_t rowStart, int32_t col, int32_t rowCount);
 
a48b6567
 	/*! Updates the columnByteOffsets member. */
 	void UpdateColumnByteOffsets();
 
 	/*! Computes the index of the row after last to read.
 	 *	@param startRow Row index of the data to be inserted into the vector at [0].
 	 *	@param count Number of elements to be retrieved. -1 indicates read to the last element.
 	 *	@return Index of row after the last row to read.
 	 */
 	int32_t ComputeEndRow(int32_t startRow, int32_t count);
 
 	/*!	Clears and resizes the vector
 	 *	@param values Reference to a vector to clear and resize.
 	 *	@param size Target size of the vector
 	 */
 	template<typename T> void ClearAndSizeVector(std::vector<T>& values, u_int32_t size);
 
e9f9e80b
 	/*! Template method to get data into a vector
 	 *	@param col Column index.
 	 *	@param startRow Row index of the data to be inserted into the vector at [0].
 	 *	@param count Number of elements to retrieve. -1 indicates to read all
 	 *	@param values Reference to the data type to fill with the data.
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	 */
 	template<typename T> void GetDataT(int32_t col, int32_t startRow, int32_t count, T& values);
 
 	/*! Template method to get data into an array
 	 *	@param col Column index.
 	 *	@param startRow Row index of the data to be inserted into the vector at [0].
 	 *	@param count Number of elements to retrieve. -1 indicates to read all
 	 *	@param values Reference to the data type to fill with the data.
 	 *	@return Number of elements read.
 	 *	@exception affymetrix_calvin_exceptions::DataSetNotOpenException The file is not memory-mapped.
 	 */
 	template<typename T> int32_t GetDataRawT(int32_t col, int32_t startRow, int32_t count, T* values);
 
 
 	/*! Returns the index of the last row mapped.
 	 *	@return Index of the last row mapped.
 	 */
 	int32_t LastRowMapped();
 
a48b6567
 	/*! Platform specific memory-mapping method */
7610fa82
 #ifdef _MSC_VER
030d93e1
 
a48b6567
 	bool MapDataWin32(u_int32_t start, u_int32_t bytes);
030d93e1
 
a48b6567
 #else
030d93e1
 
a48b6567
 	bool MapDataPosix(u_int32_t start, u_int32_t bytes);
030d93e1
 
a48b6567
 #endif
 
e9f9e80b
 	/*! Reads from the instr pointer into the vector at the index indicated.
 	 *	@param index Index to the vector where to write the value.
 	 *	@param values The vector into which to write the value.
 	 *	@param instr A pointer to the data in the memory buffer.  The pointer is advanced by the method.
 	 */
 	void AssignValue(int32_t index, Uint8Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, Int8Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, Uint16Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, Int16Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, Uint32Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, Int32Vector& values, char*& instr);
 
 	void AssignValue(int32_t index, FloatVector& values, char*& instr);
 
 	void AssignValue(int32_t index, StringVector& values, char*& instr);
 
 	void AssignValue(int32_t index, WStringVector& values, char*& instr);
 
 	void AssignValue(int32_t index, u_int8_t* values, char*& instr);
 
 	void AssignValue(int32_t index, int8_t* values, char*& instr);
 
 	void AssignValue(int32_t index, u_int16_t* values, char*& instr);
 
 	void AssignValue(int32_t index, int16_t* values, char*& instr);
 
 	void AssignValue(int32_t index, u_int32_t* values, char*& instr);
 
 	void AssignValue(int32_t index, int32_t* values, char*& instr);
 
 	void AssignValue(int32_t index, float* values, char*& instr);
 
 	void AssignValue(int32_t index, std::string* values, char*& instr);
 
 	void AssignValue(int32_t index, std::wstring* values, char*& instr);
 
a48b6567
 protected:
be3ad048
 	/*! name of the file containing the data data set*.  */
a48b6567
 	std::string fileName;
 
 	/*! copy of the DataSetHeader */
 	affymetrix_calvin_io::DataSetHeader header;
 
 	/*! pointer to the mapped data, doesn't account for allocation granularity. */
 	void*	mappedData;
 
030d93e1
 	/*! pointer to the data.  In memory-mapping mode, the pointer has been adjusted for the allocation granularity. */
a48b6567
 	char*	data;
 
 	/*! Array of column byte offsets.  Updated when the file is opened.
 	 *  There are columns + 1 elements
 	 */
030d93e1
 	Int32Vector columnByteOffsets;
a48b6567
 
7610fa82
 #ifdef _MSC_VER
030d93e1
 
a48b6567
 	/*! Handle returned by CreateFileMapping */
 	HANDLE fileMapHandle;
 	/*! Maximum size of the view to map */
 	static const u_int32_t MaxViewSize = 200*1024*1024;	// 200MB
030d93e1
 
a48b6567
 #else
030d93e1
 
a48b6567
 	FILE *fp;
030d93e1
 
a48b6567
 #endif
 
030d93e1
 	/*! Indicates if the DataSet is open*/
 	bool isOpen;
a48b6567
 	/*! Byte offset to the start of the view */
 	u_int32_t mapStart;
 	/*! Number of bytes mapped to the view */
 	u_int32_t mapLen;
030d93e1
 	/*! A flag the indicates the data access mode.  True = access the data using memory-mapping.  False = access the data using std::ifstream */
 	bool useMemoryMapping;
 	/*! An open ifstream object */
 	std::ifstream* fileStream;
 	/*! Indicates whether to attempt to read all data into a memory buffer. */
 	bool loadEntireDataSetHint;
a48b6567
 
 };
 
 }
 
030d93e1
 #endif // _DataSet_HEADER_