////////////////////////////////////////////////////////////////
//
// Copyright (C) 2005 Affymetrix, Inc.
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License 
// (version 2.1) as published by the Free Software Foundation.
// 
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
// or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
// for more details.
// 
// You should have received a copy of the GNU Lesser General Public License
// along with this library; if not, write to the Free Software Foundation, Inc.,
// 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
//
////////////////////////////////////////////////////////////////


#include "calvin_files/data/src/DataSet.h"
//
#include "calvin_files/data/src/GenericData.h"
#include "calvin_files/parsers/src/FileInput.h"
//
#include "util/Fs.h"
//
#include <sys/stat.h>
#include <sys/types.h>
//

using namespace affymetrix_calvin_io;

#ifndef _MSC_VER
#include <unistd.h>
#include <sys/mman.h>

#ifndef PAGE_SIZE
/// Page size used for memory mapping in non Windows environment
#define PAGE_SIZE (getpagesize())
#endif
#ifndef PAGE_MASK
/// Page mask used for memory mapping in non Windows environment
#define PAGE_MASK ~(PAGE_SIZE-1)
#endif
#ifndef PAGE_TRUNC
/// Page truncation pointer used for memory mapping in non Windows environment
#define PAGE_TRUNC(ptr) (ptr&(PAGE_MASK))
#endif
#endif

/*
 * Initialize the object to use memory-mapping to access the file.
 */
DataSet::DataSet(const std::string& fileName_, const DataSetHeader& header_, void* handle, bool loadEntireDataSetHint_)
{
	fileName = fileName_;
	header = header_;

	mappedData = 0;
	data = 0;
	isOpen = false;

#ifdef _MSC_VER
	fileMapHandle = handle;
#else
	fp = 0;
#endif
	mapStart = 0;
	mapLen = 0;
	fileStream = 0;
	useMemoryMapping = true;
	loadEntireDataSetHint = loadEntireDataSetHint_;
}

/*
 * Initialize the object to use std::ifstream to access the file.
 */
DataSet::DataSet(const std::string& fileName_, const affymetrix_calvin_io::DataSetHeader& header_, std::ifstream& ifs, bool loadEntireDataSetHint_)
{
	fileName = fileName_;
	header = header_;

	mappedData = 0;
	data = 0;
	isOpen = false;

#ifdef _MSC_VER
	fileMapHandle = 0;
#else
	fp = 0;
#endif
	mapStart = 0;
	mapLen = 0;
	fileStream = &ifs;
	useMemoryMapping = false;
	loadEntireDataSetHint = loadEntireDataSetHint_;
}

/*

 * Clean up.
 */
DataSet::~DataSet()
{
	Close();
}

/*
 * Informs the object to delete itself
 */
void DataSet::Delete()
{
	Close();
	delete this;
}

/*
 * Open the DataSet for reading
 */
bool DataSet::Open()
{
	UpdateColumnByteOffsets();

	if (useMemoryMapping)
		isOpen = OpenMM();
	else
	{
		ReadDataSetUsingStream();
		isOpen = true;
	}
	return isOpen;
}

/*
 * Open the file using memory-mapping
 */
bool DataSet::OpenMM()
{
#ifdef _MSC_VER
	if (MapDataWin32(header.GetDataStartFilePos(), header.GetDataSize()) == false)
		return false;
#else
	// Open the file
	fp = fopen(fileName.c_str(), "r");
	if (fp == NULL)
	{
		return false;
	}

	if (MapDataPosix(header.GetDataStartFilePos(), header.GetDataSize()) == false)
		return false;
#endif

	return true;
}

/*
 * Reads the DataSet data from the file into a memory buffer.
 */
void DataSet::ReadDataSetUsingStream()
{
	if(loadEntireDataSetHint == false)
		return;

	mapLen = header.GetDataSize();
	mapStart = header.GetDataStartFilePos();

	data = new char[mapLen];
	fileStream->seekg(mapStart);
	fileStream->read(data, mapLen);
}

/*
 * Close the DataSet
 */
void DataSet::Close()
{
	if (useMemoryMapping)
		UnmapFile();
	else
		ClearStreamData();
}

#ifdef _MSC_VER

std::string GetErrorMsg()
{
	LPVOID lpMsgBuf;
	if (!FormatMessage( 
			FORMAT_MESSAGE_ALLOCATE_BUFFER | 
			FORMAT_MESSAGE_FROM_SYSTEM | 
			FORMAT_MESSAGE_IGNORE_INSERTS,
			NULL,
			GetLastError(),
			MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language
			(LPTSTR) &lpMsgBuf,
			0,
			NULL ))
	{
		// Handle the error.
		return "";
	}

	std::string message = (char*)lpMsgBuf;

	// Free the buffer.
	LocalFree( lpMsgBuf );

	return message;
}

/*
 * Map the data on Win32
 */
bool DataSet::MapDataWin32(u_int32_t start, u_int32_t bytes)
{
	mapStart = start;

	if (bytes > MaxViewSize)
	{
		bytes = MaxViewSize;	// limit the amount of data mapped
	}

	SYSTEM_INFO sysinfo;
	GetSystemInfo (&sysinfo);
	u_int64_t qwFileOffset = u_int64_t(start);
	DWORD dwOffset = DWORD(qwFileOffset % sysinfo.dwAllocationGranularity);
	qwFileOffset = (qwFileOffset / sysinfo.dwAllocationGranularity) *
								sysinfo.dwAllocationGranularity;
	DWORD dwOffsetHigh = DWORD(qwFileOffset >> 32);
	DWORD dwOffsetLow  = DWORD(qwFileOffset & 0xFFFFFFFF);
	DWORD dwBytesToMap = bytes + dwOffset;

	if (mappedData != 0)
	{
		UnmapViewOfFile (mappedData);
	}
	mappedData = MapViewOfFile(fileMapHandle, FILE_MAP_READ, dwOffsetHigh, dwOffsetLow, dwBytesToMap);
	if (mappedData == 0)
	{
		std::string msg = GetErrorMsg();
		data = 0;
		fileMapHandle = NULL;
		mapStart = 0;
		return false;
	}
	mapLen = bytes;
	data = (char *)mappedData + dwOffset;
	return true;
}

#else

/*
 * Map the data on Linux
 */
bool DataSet::MapDataPosix(u_int32_t start, u_int32_t bytes)
{
	mapStart = start;
	if (fp == NULL)
		return false;

	u_int32_t page_start = PAGE_TRUNC(start);
	u_int32_t page_offset = start - page_start;
	mapLen = bytes + page_offset;

	// Get the file size
	if (Fs::fileExists(fileName))
	{
          int64_t fileLen = Fs::fileSize(fileName);
		if (fileLen < page_start + mapLen)
			mapLen = fileLen - page_start;
	}

	// Map the file.
	mappedData = mmap(NULL, mapLen, PROT_READ, MAP_SHARED, fileno(fp), page_start);
	if (mappedData == MAP_FAILED)
	{
		Close();
		return false;
	}
	else
	{
		data = ((char *)mappedData) + page_offset;
	}
	return true;
}
#endif

/*
 * Close the memory-map
 */
void DataSet::UnmapFile()
{
#ifdef _MSC_VER

	// Unmap the view
	if (mappedData != 0 )
	{
		UnmapViewOfFile(mappedData);
		mappedData = 0;
	}
	fileMapHandle = NULL;
	data = 0;
	mapStart = 0;
	mapLen = 0;

#else
	if (fp != NULL)
	{
		if (mappedData)
		{
			munmap(mappedData, mapLen);
			mapLen = 0;
			mappedData = 0;
		}
		fclose(fp);
		fp = NULL;
	}
#endif
}

/*
 * Delete the buffer
 */
void DataSet::ClearStreamData()
{
	delete[] data;
	data = 0;
	mapStart = 0;
	mapLen = 0;
}

/*
 * Check the row, column and expected column type
 */
void DataSet::CheckRowColumnAndType(int32_t row, int32_t col, DataSetColumnTypes type)
{
	if (isOpen == false)
	{
		affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
		throw e;
	}

	if (col < 0 || col >= header.GetColumnCnt())
	{
		affymetrix_calvin_exceptions::ColumnIndexOutOfBoundsException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
		throw e;
	}

	if (row < 0 || row >= header.GetRowCnt())
	{
		affymetrix_calvin_exceptions::RowIndexOutOfBoundsException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
		throw e;
	}

	// Check if the data type is expected

	if (header.GetColumnInfo(col).GetColumnType() != type)
	{
		affymetrix_calvin_exceptions::UnexpectedColumnTypeException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
		throw e;
	}
}

/*
 * Detemine the address of data given row and col.  Ensure all requested data is mapped
 */
char* DataSet::FilePosition(int32_t rowStart, int32_t col, int32_t rowCount)
{
	if (isOpen == false)
	{
		affymetrix_calvin_exceptions::DataSetNotOpenException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
		throw e;
	}

	// Limit row count
	if (rowCount > header.GetRowCnt())
		rowCount= header.GetRowCnt();

	if (useMemoryMapping == false && loadEntireDataSetHint == false)
	{
		return LoadDataAndReturnFilePosition(rowStart, col, rowCount);
	}

	// Byte offset in data set + byte offset of data set in file
	u_int32_t startByte = BytesPerRow()*rowStart + columnByteOffsets[col] + header.GetDataStartFilePos();

#ifdef _MSC_VER

	if (useMemoryMapping)
	{
		// Byte offset in data set + byte offset of data set in file
		u_int32_t endByte = BytesPerRow()*(rowStart+rowCount-1) + columnByteOffsets[col+1] + header.GetDataStartFilePos();	// as long as col is in bounds this is safe.

		// Remap the file if necessary
		if (startByte < mapStart || endByte > mapStart+mapLen)
		{
			if (startByte < mapStart)	// moving backwards through the data, attempt to find an optimum startByte.
			{
				u_int32_t reverseStartByte = 0;
				if (endByte > MaxViewSize)
					reverseStartByte = endByte - MaxViewSize;

				// Don't go above the DataSet data
				if (reverseStartByte < header.GetDataStartFilePos())
					reverseStartByte = header.GetDataStartFilePos();

				if (MapDataWin32(reverseStartByte, header.GetDataStartFilePos() + header.GetDataSize() - reverseStartByte) == false)
				{
					affymetrix_calvin_exceptions::DataSetRemapException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
					throw e;
				}
			}
			else	// forward
			{
				if (MapDataWin32(startByte, header.GetDataStartFilePos() + header.GetDataSize() - startByte) == false)
				{
					affymetrix_calvin_exceptions::DataSetRemapException e(L"Calvin",L"Default Description, Please Update!",affymetrix_calvin_utilities::DateTime::GetCurrentDateTime().ToString(),std::string(__FILE__),(u_int16_t)__LINE__,0);
					throw e;
				}
			}
		}
	}
#endif

	char*	filePosition = data + (startByte-mapStart);
	return filePosition;
}

/*
 *
 */
char* DataSet::LoadDataAndReturnFilePosition(int32_t rowStart, int32_t col, int32_t rowCount)
{
	// Delete the previous data
	ClearStreamData();

	mapLen = BytesPerRow()*rowCount;
	mapStart = BytesPerRow()*rowStart + columnByteOffsets[col] + header.GetDataStartFilePos();

	data = new char[mapLen];
	fileStream->seekg(mapStart);
	fileStream->read(data, mapLen);
	return data;
}

/*
 * Update the column sizes
 */
void DataSet::UpdateColumnByteOffsets()
{
	columnByteOffsets.clear();

	int32_t accum = 0;
	int32_t cols = header.GetColumnCnt();
	for (int32_t col = 0; col < cols; ++col)
	{
		columnByteOffsets.push_back(accum);
		accum += header.GetColumnInfo(col).GetSize();
	}
	columnByteOffsets.push_back(accum);
}

void DataSet::GetData(int32_t row, int32_t col, u_int8_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadUInt8(instr);
}

void DataSet::GetData(int32_t row, int32_t col, int8_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadInt8(instr);
}

void DataSet::GetData(int32_t row, int32_t col, u_int16_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadUInt16(instr);
}

void DataSet::GetData(int32_t row, int32_t col, int16_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadInt16(instr);
}

void DataSet::GetData(int32_t row, int32_t col, u_int32_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadUInt32(instr);
}

void DataSet::GetData(int32_t row, int32_t col, int32_t& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadInt32(instr);
}

void DataSet::GetData(int32_t row, int32_t col, float& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadFloat(instr);
}

void DataSet::GetData(int32_t row, int32_t col, std::string& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadString8(instr);
}

void DataSet::GetData(int32_t row, int32_t col, std::wstring& value)
{
	// Get the data
	char* instr = FilePosition(row, col);
	value = FileInput::ReadString16(instr);
}


int32_t DataSet::ComputeEndRow(int32_t startRow, int32_t count)
{
	int32_t rows = startRow + count;
	if (count == -1 || (rows > header.GetRowCnt()))
		rows = header.GetRowCnt();

	return rows;
}

template<typename T> void DataSet::ClearAndSizeVector(std::vector<T>& values, u_int32_t size)
{
	values.clear();
	values.resize(size);
}

template<typename T> void DataSet::GetDataT(int32_t col, int32_t startRow, int32_t count, T& values)
{
	int32_t endRow = ComputeEndRow(startRow, count);
	ClearAndSizeVector(values, endRow-startRow);

	if (header.GetColumnCnt() > 1)
	{
		for (int32_t row = startRow; row < endRow; ++row)
		{
			// Get the data
			char* instr = FilePosition(row, col);
			AssignValue(row-startRow, values, instr);
		}
	}
	else
	{
		char* instr = FilePosition(startRow, col, count);
		int32_t recomputePositionRow = LastRowMapped();

		for (int32_t row = startRow; row < endRow; ++row)
		{
			if (row > recomputePositionRow)
			{
				instr = FilePosition(row, col, count-row);
				recomputePositionRow = LastRowMapped();
			}
			AssignValue(row-startRow, values, instr);
		}
	}
}

void DataSet::AssignValue(int32_t index, Uint8Vector& values, char*& instr)
{
	values[index] = FileInput::ReadUInt8(instr);
}

void DataSet::AssignValue(int32_t index, Int8Vector& values, char*& instr)
{
	values[index] = FileInput::ReadInt8(instr);
}

void DataSet::AssignValue(int32_t index, Uint16Vector& values, char*& instr)
{
	values[index] = FileInput::ReadUInt16(instr);
}

void DataSet::AssignValue(int32_t index, Int16Vector& values, char*& instr)
{
	values[index] = FileInput::ReadInt16(instr);
}

void DataSet::AssignValue(int32_t index, Uint32Vector& values, char*& instr)
{
	values[index] = FileInput::ReadUInt32(instr);
}

void DataSet::AssignValue(int32_t index, Int32Vector& values, char*& instr)
{
	values[index] = FileInput::ReadInt32(instr);
}

void DataSet::AssignValue(int32_t index, FloatVector& values, char*& instr)
{
	values[index] = FileInput::ReadFloat(instr);
}

void DataSet::AssignValue(int32_t index, StringVector& values, char*& instr)
{
	values[index] = FileInput::ReadString8(instr);
}

void DataSet::AssignValue(int32_t index, WStringVector& values, char*& instr)
{
	values[index] = FileInput::ReadString16(instr);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint8Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int8Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint16Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int16Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Uint32Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, Int32Vector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, FloatVector& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, std::vector<std::string>& values)
{
	GetDataT(col, startRow, count, values);
}

void DataSet::GetData(int32_t col, int32_t startRow, int32_t count, WStringVector& values)
{
	GetDataT(col, startRow, count, values);
}

template<typename T> int32_t DataSet::GetDataRawT(int32_t col, int32_t startRow, int32_t count, T* values)
{
	int32_t endRow = ComputeEndRow(startRow, count);

	if (header.GetColumnCnt() > 1)
	{
		for (int32_t row = startRow; row < endRow; ++row)
		{
			char* instr = FilePosition(row, col);
			AssignValue(row-startRow, values, instr);
		}
	}
	else // optimize
	{
		char* instr = FilePosition(startRow, col, count);
		int32_t recomputePositionRow = LastRowMapped();

		for (int32_t row = startRow; row < endRow; ++row)
		{
			if (row > recomputePositionRow)
			{
				instr = FilePosition(row, col, count-row);
				recomputePositionRow = LastRowMapped();
			}
			AssignValue(row-startRow, values, instr);
		}
	}
	return endRow-startRow;
}

void DataSet::AssignValue(int32_t index, u_int8_t* values, char*& instr)
{
	values[index] = FileInput::ReadUInt8(instr);
}

void DataSet::AssignValue(int32_t index, int8_t* values, char*& instr)
{
	values[index] = FileInput::ReadInt8(instr);
}

void DataSet::AssignValue(int32_t index, u_int16_t* values, char*& instr)
{
	values[index] = FileInput::ReadUInt16(instr);
}

void DataSet::AssignValue(int32_t index, int16_t* values, char*& instr)
{
	values[index] = FileInput::ReadInt16(instr);
}

void DataSet::AssignValue(int32_t index, u_int32_t* values, char*& instr)
{
	values[index] = FileInput::ReadUInt32(instr);
}

void DataSet::AssignValue(int32_t index, int32_t* values, char*& instr)
{
	values[index] = FileInput::ReadInt32(instr);
}

void DataSet::AssignValue(int32_t index, float* values, char*& instr)
{
	values[index] = FileInput::ReadFloat(instr);
}

void DataSet::AssignValue(int32_t index, std::string* values, char*& instr)
{
	values[index] = FileInput::ReadString8(instr);
}
void DataSet::AssignValue(int32_t index, std::wstring* values, char*& instr)
{
	values[index] = FileInput::ReadString16(instr);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int8_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int8_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int16_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int16_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, u_int32_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, int32_t* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, float* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::string* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::GetDataRaw(int32_t col, int32_t startRow, int32_t count, std::wstring* values)
{
	return GetDataRawT(col, startRow, count, values);
}

int32_t DataSet::LastRowMapped()
{
	return (mapLen+(mapStart-header.GetDataStartFilePos()))/BytesPerRow() - 1;
}