/*************************************************************************** cramp.cpp /*************************************************************************** cramp.hpp -- renamed cramp.h to avoid R checker warning A C++ wrapper for the RAMP code. Use this library to parse an mzXML file in a non-sequential way, by taking advantage of the index element. (C) 2004 by Brian Pratt, Insilicos LLC Based on mzXML2Other, which has this copyright: ------------------- begin : Wed Apr 2 copyright : (C) 2002 by Pedrioli Patrick, ISB, Proteomics email : ppatrick@student.ethz.ch ***************************************************************************/ /*************************************************************************** * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU Library or "Lesser" General Public * * License (LGPL) as published by the Free Software Foundation; * * either version 2 of the License, or (at your option) any later * * version. * * * ***************************************************************************/ #include #include #include #include "stdio.h" #if !defined(_MSC_VER) && !defined(__MINGW32__) #include "sys/errno.h" #endif #include "cramp.h" /** * This function performs a non-sequential parsing operation on an indexed * msxml file. * * @param fileName: Name of the msxml file * @param startSCan: Number of the scan we want to read from * @param what: -HEADER will return num, msLevel and retentionTime * -SCAN will return only the peaks * -ALL will return everything found in scan, precursorMz and peaks * * @return pData is dynamically allocate and becomes property of the caller, who * is responsible for its deallocation!! */ cRamp::cRamp( const char* fileName,bool declaredScansOnly ) : m_filename(fileName), m_declaredScansOnly(declaredScansOnly), m_runInfo() { m_handle = rampOpenFile(fileName); m_scanOffsets = NULL; m_runInfo = NULL; m_lastScan = 0; if (!OK()) { // HENRY -- I would prefer this to be silent, and let the caller deals with it // cout << "Error: Could not open file " << fileName << ": " << strerror(errno) << endl; // END HENRY } else { m_runInfo = getRunInfo(); // HENRY -- always read index to set scan count, since scan count // declared at the top of the mzXML file is unreliable now that // there are missing scans. // This will also set the structs m_scanOffsets, and the value m_lastScan // if (m_runInfo->m_data.scanCount < 0) { // undeclared scan count // this will provoke reading of index, which sets scan count rampScanInfo* tmp = getScanHeaderInfo ( 1 ); free(tmp); // } // END HENRY } } cRamp::~cRamp() { rampCloseFile(m_handle); // NB: these pointers may be null on file open failure, // but free/delete of NULL is OK per C++ standard free(m_scanOffsets); delete m_runInfo; // was free() - but allocated with new } // // here are the private guts // rampInfo* cRamp::do_ramp( ramp_fileoffset_t arg , eWhatToRead what ) { switch( what ) { case RAMP_RUNINFO: case RAMP_HEADER: case RAMP_PEAKS: case RAMP_INSTRUMENT: break; // OK default: std::cerr << "unknown read type!\n"; return NULL; break; } rampInfo* returnPtr=NULL; if ((RAMP_RUNINFO != what) && (RAMP_INSTRUMENT != what) && !m_scanOffsets) { int iLastScan = 0; // we need the index to get anything besides the header ramp_fileoffset_t indexOffset = getIndexOffset(m_handle); m_scanOffsets = readIndex(m_handle, indexOffset, &iLastScan); if (iLastScan >= m_runInfo->m_data.scanCount) { if (!m_declaredScansOnly) { m_runInfo->m_data.scanCount = iLastScan; } else { // get rid of all the fake entries created for (int n=1;n<=iLastScan;n++) { // ramp is 1 based if (m_scanOffsets[n]==-1) { // find a run of fakes int m; for (m=n+1;(m<=iLastScan)&&(m_scanOffsets[m]==-1);m++); if (m<=iLastScan) { memmove(m_scanOffsets+n,m_scanOffsets+m, sizeof(ramp_fileoffset_t)*((iLastScan-m)+1)); } iLastScan-=(m-n); } } } } // HENRY - store last scan explicitly. m_lastScan = iLastScan; // END HENRY } // HENRY -- arg is out of bounds. instead of creating havoc in RAMP, let's just kill it here. if (RAMP_RUNINFO != what && (RAMP_INSTRUMENT != what) && (arg > m_runInfo->m_data.scanCount || arg < 1)) { return (NULL); } if (m_scanOffsets || (RAMP_RUNINFO == what) || (RAMP_INSTRUMENT == what)) { ramp_fileoffset_t scanOffset=-1; if (RAMP_RUNINFO == what || RAMP_INSTRUMENT == what) { scanOffset = 0; // read from head of file } else { scanOffset = m_scanOffsets[arg]; // ramp is one-based } if (scanOffset >= 0) { // ----------------------------------------------------------------------- // And now we can parse the info we were looking for // ----------------------------------------------------------------------- // Ok now we have to copy everything in our structure switch( what ) { case RAMP_RUNINFO: returnPtr = new rampRunInfo( m_handle ); break; case RAMP_HEADER: returnPtr = new rampScanInfo( m_handle, scanOffset, (int)arg ); if (returnPtr) { #ifdef HAVE_PWIZ_MZML_LIB if (!m_handle->mzML) // rampadapter already set this for us #endif ((rampScanInfo *)returnPtr)->m_data.filePosition = scanOffset; // for future reference // HENRY -- error checking here if (((rampScanInfo*)returnPtr)->m_data.acquisitionNum < 0) { // something failed in RAMP, possibly because it's a missing scan delete ((rampScanInfo*)returnPtr); returnPtr = NULL; } } break; case RAMP_PEAKS: returnPtr = new rampPeakList( m_handle, scanOffset); // HENRY -- error checking here if (returnPtr && ((rampPeakList*)returnPtr)->getPeakCount() <= 0) { // something failed in RAMP, possibly because it's a missing scan delete ((rampPeakList*)returnPtr); returnPtr = NULL; } break; // HENRY -- add the instrument info reading functionality (present in RAMP, but not provided in cRAMP before) case RAMP_INSTRUMENT: returnPtr = new rampInstrumentInfo(m_handle); if (((rampInstrumentInfo*)returnPtr)->m_instrumentStructPtr == NULL) { delete ((rampInstrumentInfo*)returnPtr); returnPtr = NULL; } break; } } } return returnPtr; } /** * This function performs a non-sequential parsing operation on an indexed * msxml file to obtain minimal info on the msRun contained in the file. * * @return rapRunInfo* is dynamically allocate and becomes property of the caller, who * is responsible for its deallocation!! */ rampRunInfo* cRamp::getRunInfo ( ) { rampRunInfo* result; if (m_runInfo) { // did we derive this already? result = new rampRunInfo(*m_runInfo); } else { result = (rampRunInfo*) do_ramp(0, RAMP_RUNINFO); } return result; } /** * This function performs a non-sequential parsing operation on an indexed * msxml file to obtain minimal header info for a numbered scan (thus minimizing parse time). * * @param fileName: Name of the msxml file * @param startSCan: Number of the scan we want to read from * @return rapHeaderInfo* is dynamically allocate and becomes property of the caller, who * is responsible for its deallocation!! returns just the minimal header info num, msLevel and retentionTime */ rampScanInfo* cRamp::getScanHeaderInfo ( int whichScan ) { return (rampScanInfo*) do_ramp((ramp_fileoffset_t)whichScan, RAMP_HEADER); } /** * This function performs a non-sequential parsing operation on an indexed * msxml file to obtain peak info for a numbered scan. * * @param fileName: Name of the msxml file * @param startSCan: Number of the scan we want to read from * @return rapPeakList* is dynamically allocate and becomes property of the caller, who * is responsible for its deallocation!! returns everything found in scan, precursorMz and peaks */ rampPeakList* cRamp::getPeakList ( int whichScan ) { return (rampPeakList*) do_ramp((ramp_fileoffset_t)whichScan, RAMP_PEAKS); } // HENRY - provides instrument info getting method rampInstrumentInfo* cRamp::getInstrumentInfo () { return (rampInstrumentInfo*) do_ramp(0, RAMP_INSTRUMENT); } // END HENRY // HENRY - sequential access parser that skips over missing scans. This version only reads scan header. bool cRampIterator::nextScan(rampScanInfo** scanInfo) { while (++m_currentScan <= m_cramp.getLastScan() && m_cramp.getScanOffset(m_currentScan) <= 0); if (m_currentScan > m_cramp.getLastScan()) { return (false); } *scanInfo = (rampScanInfo*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_HEADER); return (true); } // END HENRY // HENRY - sequential access parser that skips over missing scans. This version reads both scan header and peak list. bool cRampIterator::nextScan(rampScanInfo** scanInfo, rampPeakList** peakList) { while (++m_currentScan <= m_cramp.getLastScan() && m_cramp.getScanOffset(m_currentScan) <= 0); if (m_currentScan > m_cramp.getLastScan()) { return (false); } *scanInfo = (rampScanInfo*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_HEADER); *peakList = (rampPeakList*)m_cramp.do_ramp((ramp_fileoffset_t)(m_currentScan), RAMP_PEAKS); return (true); } // END HENRY // HENRY - resets the sequential access parser to the first scan. void cRampIterator::reset() { m_currentScan = 1; } // END HENRY /** * populate from a file handle **/ rampPeakList::rampPeakList(RAMPFILE *handle, ramp_fileoffset_t index) { init(); m_peaksCount = readPeaksCount(handle,index); m_pPeaks = (rampPeakInfoStruct *)readPeaks(handle,index); } /** * populate from a file handle **/ rampScanInfo::rampScanInfo(RAMPFILE *handle, ramp_fileoffset_t index, int seqNum) { init(); readHeader(handle,index,&m_data); m_data.seqNum = seqNum; } /** * populate from a file handle **/ rampRunInfo::rampRunInfo(RAMPFILE *handle) { init(); readMSRun(handle,&m_data); } // HENRY - provides instrument info reading functionality rampInstrumentInfo::rampInstrumentInfo(RAMPFILE *handle) { init(); m_instrumentStructPtr = getInstrumentStruct(handle); } // END HENRY