60c84217 |
/***************************************************************************
cramp.cpp
/***************************************************************************
cramp.hpp -- renamed cramp.h to avoid R checker warning
A C++ wrapper for the RAMP code.
Use this library to parse an mzXML file in a non-sequential way, by
taking advantage of the index element.
(C) 2004 by Brian Pratt, Insilicos LLC
Based on mzXML2Other, which has this copyright:
-------------------
begin : Wed Apr 2
copyright : (C) 2002 by Pedrioli Patrick, ISB, Proteomics
email : ppatrick@student.ethz.ch
***************************************************************************/
/***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU Library or "Lesser" General Public *
* License (LGPL) as published by the Free Software Foundation; *
* either version 2 of the License, or (at your option) any later *
* version. *
* *
***************************************************************************/
#include <stdlib.h>
#include <iostream>
#include <fstream>
#include "stdio.h"
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include "sys/errno.h"
#endif
#include "cramp.h"
/**
* This function performs a non-sequential parsing operation on an indexed
* msxml file.
*
* @param fileName: Name of the msxml file
* @param startSCan: Number of the scan we want to read from
* @param what: -HEADER will return num, msLevel and retentionTime
* -SCAN will return only the peaks
* -ALL will return everything found in scan, precursorMz and peaks
*
* @return pData is dynamically allocate and becomes property of the caller, who
* is responsible for its deallocation!!
*/
cRamp::cRamp( const char* fileName,bool declaredScansOnly ) :
m_filename(fileName), m_declaredScansOnly(declaredScansOnly), m_runInfo()
{
m_handle = rampOpenFile(fileName);
m_scanOffsets = NULL;
m_runInfo = NULL;
m_lastScan = 0;
if (!OK()) {
// HENRY -- I would prefer this to be silent, and let the caller deals with it
// cout << "Error: Could not open file " << fileName << ": " << strerror(errno) << endl;
// END HENRY
} else {
m_runInfo = getRunInfo();
// HENRY -- always read index to set scan count, since scan count
// declared at the top of the mzXML file is unreliable now that
// there are missing scans.
// This will also set the structs m_scanOffsets, and the value m_lastScan
// if (m_runInfo->m_data.scanCount < 0) { // undeclared scan count
// this will provoke reading of index, which sets scan count
rampScanInfo* tmp = getScanHeaderInfo ( 1 );
free(tmp);
// }
// END HENRY
}
}
cRamp::~cRamp() {
rampCloseFile(m_handle);
// NB: these pointers may be null on file open failure,
// but free/delete of NULL is OK per C++ standard
free(m_scanOffsets);
delete m_runInfo; // was free() - but allocated with new
}
//
// here are the private guts
//
rampInfo* cRamp::do_ramp( ramp_fileoffset_t arg , eWhatToRead what )
{
switch( what ) {
case RAMP_RUNINFO:
case RAMP_HEADER:
case RAMP_PEAKS:
case RAMP_INSTRUMENT:
break; // OK
default:
std::cerr << "unknown read type!\n";
return NULL;
break;
}
rampInfo* returnPtr=NULL;
if ((RAMP_RUNINFO != what) && (RAMP_INSTRUMENT != what) && !m_scanOffsets) {
int iLastScan = 0;
// we need the index to get anything besides the header
ramp_fileoffset_t indexOffset = getIndexOffset(m_handle);
m_scanOffsets = readIndex(m_handle, indexOffset, &iLastScan);
if (iLastScan >= m_runInfo->m_data.scanCount) {
if (!m_declaredScansOnly) {
m_runInfo->m_data.scanCount = iLastScan;
} else { // get rid of all the fake entries created
for (int n=1;n<=iLastScan;n++) { // ramp is 1 based
if (m_scanOffsets[n]==-1) {
// find a run of fakes
int m;
for (m=n+1;(m<=iLastScan)&&(m_scanOffsets[m]==-1);m++);
if (m<=iLastScan) {
memmove(m_scanOffsets+n,m_scanOffsets+m,
sizeof(ramp_fileoffset_t)*((iLastScan-m)+1));
}
iLastScan-=(m-n);
}
}
}
}
// HENRY - store last scan explicitly.
m_lastScan = iLastScan;
// END HENRY
}
// HENRY -- arg is out of bounds. instead of creating havoc in RAMP, let's just kill it here.
if (RAMP_RUNINFO != what && (RAMP_INSTRUMENT != what) && (arg > m_runInfo->m_data.scanCount || arg < 1)) {
return (NULL);
}
if (m_scanOffsets || (RAMP_RUNINFO == what) || (RAMP_INSTRUMENT == what)) {
ramp_fileoffset_t scanOffset=-1;
if (RAMP_RUNINFO == what || RAMP_INSTRUMENT == what) {
scanOffset = 0; // read from head of file
} else {
scanOffset = m_scanOffsets[arg]; // ramp is one-based
}
if (scanOffset >= 0) {
// -----------------------------------------------------------------------
// And now we can parse the info we were looking for
// -----------------------------------------------------------------------
// Ok now we have to copy everything in our structure
switch( what )
{
case RAMP_RUNINFO:
returnPtr = new rampRunInfo( m_handle );
break;
case RAMP_HEADER:
returnPtr = new rampScanInfo( m_handle, scanOffset, (int)arg );
if (returnPtr) {
#ifdef HAVE_PWIZ_MZML_LIB
if (!m_handle->mzML) // rampadapter already set this for us
#endif
((rampScanInfo *)returnPtr)->m_data.filePosition = scanOffset; // for future reference
// HENRY -- error checking here
if (((rampScanInfo*)returnPtr)->m_data.acquisitionNum < 0) {
// something failed in RAMP, possibly because it's a missing scan
delete ((rampScanInfo*)returnPtr);
returnPtr = NULL;
}
}
break;
case RAMP_PEAKS:
returnPtr = new rampPeakList( m_handle, scanOffset);
// HENRY -- error checking here
if (returnPtr && ((rampPeakList*)returnPtr)->getPeakCount() <= 0) {
// something failed in RAMP, possibly because it's a missing scan
delete ((rampPeakList*)returnPtr);
returnPtr = NULL;
}
break;
// HENRY -- add the instrument info reading functionality (present in RAMP, but not provided in cRAMP before)
case RAMP_INSTRUMENT:
returnPtr = new rampInstrumentInfo(m_handle);
if (((rampInstrumentInfo*)returnPtr)->m_instrumentStructPtr == NULL) {
delete ((rampInstrumentInfo*)returnPtr);
returnPtr = NULL;
}
break;
}
}
}
return returnPtr;
}
/**
* This function performs a non-sequential parsing operation on an indexed
* msxml file to obtain minimal info on the msRun contained in the file.
*
* @return rapRunInfo* is dynamically allocate and becomes property of the caller, who
* is responsible for its deallocation!!
*/
rampRunInfo* cRamp::getRunInfo ( ) {
rampRunInfo* result;
if (m_runInfo) { // did we derive this already?
result = new rampRunInfo(*m_runInfo);
} else {
result = (rampRunInfo*) do_ramp(0, RAMP_RUNINFO);
}
return result;
}
/**
* This function performs a non-sequential parsing operation on an indexed
* msxml file to obtain minimal header info for a numbered scan (thus minimizing parse time).
*
* @param fileName: Name of the msxml file
* @param startSCan: Number of the scan we want to read from
* @return rapHeaderInfo* is dynamically allocate and becomes property of the caller, who
* is responsible for its deallocation!! returns just the minimal header info num, msLevel and retentionTime
*/
rampScanInfo* cRamp::getScanHeaderInfo ( int whichScan ) {
return (rampScanInfo*) do_ramp((ramp_fileoffset_t)whichScan, RAMP_HEADER);
}
/**
* This function performs a non-sequential parsing operation on an indexed
* msxml file to obtain peak info for a numbered scan.
*
* @param fileName: Name of the msxml file
* @param startSCan: Number of the scan we want to read from
* @return rapPeakList* is dynamically allocate and becomes property of the caller, who
* is responsible for its deallocation!! returns everything found in scan, precursorMz and peaks
*/
rampPeakList* cRamp::getPeakList ( int whichScan ) {
return (rampPeakList*) do_ramp((ramp_fileoffset_t)whichScan, RAMP_PEAKS);
}
// HENRY - provides instrument info getting method
rampInstrumentInfo* cRamp::getInstrumentInfo () {
return (rampInstrumentInfo*) do_ramp(0, RAMP_INSTRUMENT);
}
// END HENRY
// HENRY - sequential access parser that skips over missing scans. This version only reads scan header.
bool cRampIterator::nextScan(rampScanInfo** scanInfo) {
while (++m_currentScan <= m_cramp.getLastScan() && m_cramp.getScanOffset(m_currentScan) <= 0);
if (m_currentScan > m_cramp.getLastScan()) {
return (false);
}
*scanInfo = (rampScanInfo*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_HEADER);
return (true);
}
// END HENRY
// HENRY - sequential access parser that skips over missing scans. This version reads both scan header and peak list.
bool cRampIterator::nextScan(rampScanInfo** scanInfo, rampPeakList** peakList) {
while (++m_currentScan <= m_cramp.getLastScan() && m_cramp.getScanOffset(m_currentScan) <= 0);
if (m_currentScan > m_cramp.getLastScan()) {
return (false);
}
*scanInfo = (rampScanInfo*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_HEADER);
*peakList = (rampPeakList*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_PEAKS);
return (true);
}
// END HENRY
// HENRY - resets the sequential access parser to the first scan.
void cRampIterator::reset() {
m_currentScan = 1;
}
// END HENRY
/**
* populate from a file handle
**/
rampPeakList::rampPeakList(RAMPFILE *handle, ramp_fileoffset_t index) {
init();
m_peaksCount = readPeaksCount(handle,index);
m_pPeaks = (rampPeakInfoStruct *)readPeaks(handle,index);
}
/**
* populate from a file handle
**/
rampScanInfo::rampScanInfo(RAMPFILE *handle, ramp_fileoffset_t index, int seqNum) {
init();
readHeader(handle,index,&m_data);
m_data.seqNum = seqNum;
}
/**
* populate from a file handle
**/
rampRunInfo::rampRunInfo(RAMPFILE *handle) {
init();
readMSRun(handle,&m_data);
}
// HENRY - provides instrument info reading functionality
rampInstrumentInfo::rampInstrumentInfo(RAMPFILE *handle) {
init();
m_instrumentStructPtr = getInstrumentStruct(handle);
}
// END HENRY
|