#include "RcppIdent.h"

RcppIdent::RcppIdent()
{
    mzid = NULL;
}

void RcppIdent::open(const string& fileName)
{
    filename = fileName;
    mzid = new IdentDataFile(fileName);
}

Rcpp::List RcppIdent::getIDInfo(  )
{
    provider = (mzid->provider.contactRolePtr.get()!=0?mzid->provider.contactRolePtr.get()->name():"");
    date = mzid->creationDate;
    vector<AnalysisSoftwarePtr> as = mzid->analysisSoftwareList;

    vector<SearchDatabasePtr> sdb = mzid->dataCollection.inputs.searchDatabase;
    Rcpp::StringVector software(as.size());

    for (size_t i = 0; i < as.size(); i++)
    {
	software[i] = as[i]->name + " " + as[i]->version + " " + (as[i]->contactRolePtr.get()!=0?as[i]->contactRolePtr->contactPtr->name:"") ;
    }

    vector<SpectrumIdentificationProtocolPtr> sip = mzid->analysisProtocolCollection.spectrumIdentificationProtocol;
    string fragmentTolerance = "";
    string parentTolerance = "";
    if(!sip[0]->fragmentTolerance.empty())
    {
	fragmentTolerance = sip[0]->fragmentTolerance.cvParams[0].value + " " + sip[0]->fragmentTolerance.cvParam(MS_search_tolerance_plus_value).unitsName();
    }

    if(!sip[0]->parentTolerance.empty())
    {
	parentTolerance = sip[0]->parentTolerance.cvParams[0].value + " " + sip[0]->parentTolerance.cvParam(MS_search_tolerance_plus_value).unitsName();
    }

    vector<SearchModificationPtr> sm = sip[0]->modificationParams;

    Rcpp::StringVector mod(sm.size());
    for(size_t i = 0; i < sm.size(); i++)
    {
	mod[i] = cvTermInfo(sm[i]->cvParams[0].cvid).name;
    }

    vector<EnzymePtr> enz = sip[0]->enzymes.enzymes;
    Rcpp::List enzymes;
    Rcpp::StringVector name(enz.size());
    Rcpp::StringVector nTermGain(enz.size());
    Rcpp::StringVector cTermGain(enz.size());
    Rcpp::StringVector minDistance(enz.size());
    Rcpp::StringVector missedCleavages(enz.size());

    for (size_t i = 0; i < enz.size(); i++)
    {
	name[i] = cvTermInfo(cleavageAgent(*enz[i].get())).name;
	nTermGain[i] = enz[i]->nTermGain;
	cTermGain[i] = enz[i]->cTermGain;
	minDistance[i] = enz[i]->minDistance;
	missedCleavages[i] = enz[i]->missedCleavages;
    }

    enzymes = Rcpp::List::create(
		  Rcpp::_["name"]  = name,
		  Rcpp::_["nTermGain"]  = nTermGain,
		  Rcpp::_["cTermGain"]  = cTermGain,
		  Rcpp::_["minDistance"]  = minDistance,
		  Rcpp::_["missedCleavages"]  = missedCleavages
	      );

    vector<SpectraDataPtr> sd = mzid->dataCollection.inputs.spectraData;
    Rcpp::StringVector spectra(sd.size());
    for (size_t i = 0; i < sd.size(); i++)
    {
	spectra[i] = sd[i]->location;
    }

    return Rcpp::List::create(
	       Rcpp::_["FileProvider"]	= provider,
	       Rcpp::_["CreationDate"]	= date,
	       Rcpp::_["software"]	= software,
	       Rcpp::_["ModificationSearched"]	= mod,
	       Rcpp::_["FragmentTolerance"]	= fragmentTolerance,
	       Rcpp::_["ParentTolerance"]	= parentTolerance,
	       Rcpp::_["enzymes"]	= enzymes,
	       Rcpp::_["SpectraSource"]	= spectra
	   );

}

Rcpp::DataFrame RcppIdent::getPsmInfo(  )
{
    vector<SpectrumIdentificationResultPtr> spectrumIdResult = mzid->analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr->spectrumIdentificationResult;

    std::vector<std::string> spectrumID;
    std::vector<int> chargeState;
    std::vector<int> rank;
    std::vector<double> experimentalMassToCharge;
    std::vector<double> calculatedMassToCharge;
    std::vector<std::string> seq;
    std::vector<std::string> peptideRef;
    std::vector<int> modification;
    std::vector<bool> isDecoy;
    std::vector<bool> passThreshold;
    std::vector<std::string> post;
    std::vector<std::string> pre;
    std::vector<int> start;
    std::vector<int> end;
    std::vector<std::string> DBSequenceID;
    std::vector<std::string> DBseq;
    std::vector<int> DBSequenceLen;
    std::vector<std::string> DBdesc;

    for (size_t i = 0; i < spectrumIdResult.size(); i++)
    {
	for(size_t j = 0; j < spectrumIdResult[i]->spectrumIdentificationItem.size(); j++)
	{
	    for(size_t k = 0; k < spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr.size(); k++)
	    {

		spectrumID.push_back(spectrumIdResult[i]->spectrumID);
		chargeState.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->chargeState);
		rank.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->rank);
		passThreshold.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->passThreshold);
		experimentalMassToCharge.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->experimentalMassToCharge);
		calculatedMassToCharge.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->calculatedMassToCharge);
		seq.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptidePtr->peptideSequence);
		peptideRef.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptidePtr->id);
		modification.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptidePtr->modification.size());
		isDecoy.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->isDecoy);
		pre.push_back(string(1, spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->pre));
		post.push_back(string(1, spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->post));
		start.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->start);
		end.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->end);
		if(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr.get()!=0)
		{
		    DBSequenceID.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr->accession);
		    DBSequenceLen.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr->length);
		    DBseq.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr->seq);
		    if(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr->cvParams.size() > 0)
		    {
			DBdesc.push_back(spectrumIdResult[i]->spectrumIdentificationItem[j]->peptideEvidencePtr[k]->dbSequencePtr->cvParams[0].value);
		    }
		    else
		    {
			DBdesc.push_back("");
		    }
		}
		else
		{
		    DBSequenceID.push_back("");
		    DBseq.push_back("");
		    DBdesc.push_back("");
		}

	    }
	}
    }



    return Rcpp::DataFrame::create(
	       Rcpp::_["spectrumID"]	= spectrumID,
	       Rcpp::_["chargeState"]	= chargeState,
	       Rcpp::_["rank"]	= rank,
	       Rcpp::_["passThreshold"]	= passThreshold,
	       Rcpp::_["experimentalMassToCharge"]	= experimentalMassToCharge,
	       Rcpp::_["calculatedMassToCharge"]	= calculatedMassToCharge,
	       Rcpp::_["sequence"]	= seq,
	       Rcpp::_["peptideRef"]	= peptideRef,
	       Rcpp::_["modNum"]	= modification,
	       Rcpp::_["isDecoy"]	= isDecoy,
	       Rcpp::_["post"]	= post,
	       Rcpp::_["pre"]	= pre,
	       Rcpp::_["start"]	= start,
	       Rcpp::_["end"]	= end,
	       Rcpp::_["DatabaseAccess"]	= DBSequenceID,
	       Rcpp::_["DBseqLength"]	= DBSequenceLen,
	       Rcpp::_["DatabaseSeq"]	= DBseq,
	       Rcpp::_["DatabaseDescription"]	= DBdesc
	   );
}

Rcpp::DataFrame RcppIdent::getModInfo(  )
{

    vector<SpectrumIdentificationResultPtr> spectrumIdResult = mzid->analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr->spectrumIdentificationResult;
    vector<string> spectrumID;
    vector<string> seq;
    vector<string> peptideRef;
    vector<string> name;
    vector<double> mass;
    vector<int> loc;

    for (size_t i = 0; i < spectrumIdResult.size(); i++)
    {
	for(size_t k = 0; k < spectrumIdResult[i]->spectrumIdentificationItem.size() ; k++)
	{
	    if(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->modification.size()>0)
	    {
		for(size_t j = 0 ; j < spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->modification.size(); j++)
		{
		    spectrumID.push_back(spectrumIdResult[i]->spectrumID);
		    seq.push_back(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->peptideSequence);
		    peptideRef.push_back(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->id);
		    name.push_back(cvTermInfo(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->modification[j]->cvParams[0].cvid).name);
		    mass.push_back(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->modification[j]->monoisotopicMassDelta);
		    loc.push_back(spectrumIdResult[i]->spectrumIdentificationItem[k]->peptidePtr->modification[j]->location);
		}
	    }
	}
    }

    return Rcpp::DataFrame::create(
	       Rcpp::_["spectrumID"]	= spectrumID,
	       Rcpp::_["sequence"]	= seq,
	       Rcpp::_["peptideRef"]	= peptideRef,
	       Rcpp::_["name"]	= name,
	       Rcpp::_["mass"]	= mass,
	       Rcpp::_["location"]	= loc);

}

Rcpp::DataFrame RcppIdent::getSubInfo(  )
{
    vector<SpectrumIdentificationResultPtr> spectrumIdResult = mzid->analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr->spectrumIdentificationResult;
    vector<string> spectrumID;
    std::vector<std::string> seq;
    std::vector<char> originalResidue;
    std::vector<char> replacementResidue;
    std::vector<int> loc;


    for (size_t i = 0; i < spectrumIdResult.size(); i++)
    {

	if(spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->substitutionModification.size() > 0)
	{
	    for(size_t j = 0 ; j < spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->substitutionModification.size(); j++)
	    {
		spectrumID.push_back(spectrumIdResult[i]->spectrumID);
		seq.push_back(spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->peptideSequence);
		originalResidue.push_back(spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->substitutionModification[j]->originalResidue);
		replacementResidue.push_back(spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->substitutionModification[j]->replacementResidue);
		loc.push_back(spectrumIdResult[i]->spectrumIdentificationItem[0]->peptidePtr->substitutionModification[j]->location);
	    }
	}
    }

    return Rcpp::DataFrame::create(
	       Rcpp::_["spectrumID"]	= spectrumID,
	       Rcpp::_["sequence"]	= seq,
	       Rcpp::_["originalResidue"]	= originalResidue,
	       Rcpp::_["replacementResidue"]	= replacementResidue,
	       Rcpp::_["location"]	= loc
	   );

}

Rcpp::DataFrame RcppIdent::getScore(  ) {
  vector<SpectrumIdentificationResultPtr> spectrumIdResult = mzid->analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr->spectrumIdentificationResult;
  vector<string> spectrumID;
  vector<string> names;
  int count = 0;
  int nCvParams = 0;

  for (size_t i = 0; i < spectrumIdResult[0]->spectrumIdentificationItem[0]->cvParams.size(); i++) {
    if (!spectrumIdResult[0]->spectrumIdentificationItem[0]->cvParams[i].value.empty()) {
      count++;
      nCvParams++;
      names.push_back(cvTermInfo(spectrumIdResult[0]->spectrumIdentificationItem[0]->cvParams[i].cvid).name);
    }
  }
  if(count == 0) {
    Rcpp::Rcout << "No scoring information available" << std::endl;
    return Rcpp::DataFrame::create();
  } else {
    vector<vector<double> > score(count);
    for (size_t i = 0; i < spectrumIdResult.size(); i++) {
      for (size_t k = 0; k < spectrumIdResult[i]->spectrumIdentificationItem.size(); k++) {
	for (size_t n = 0; n < spectrumIdResult[i]->spectrumIdentificationItem[k]->peptideEvidencePtr.size(); n++) {
	  spectrumID.push_back(spectrumIdResult[i]->spectrumID);
	  count = 0;

	  // The original loop iterated to j <
	  // spectrumIdResult[i]->spectrumIdentificationItem[k]->cvParams.size()
	  // which failed when some SpectrumIdentificationItem
	  // suddently have additional cvParams, such as in Mascot
	  // results - see https://github.com/sneumann/mzR/issues/136
	  for (size_t j = 0; j < nCvParams; j++) {
	    if (!spectrumIdResult[i]->spectrumIdentificationItem[k]->cvParams[j].value.empty()) {
	      score[count].push_back(lexical_cast<double>(spectrumIdResult[i]->spectrumIdentificationItem[k]->cvParams[j].value));
	      count++;
	    }
	  }
	}
      }
    }

    Rcpp::List res(score.size() + 1);
    names.insert(names.begin(), "spectrumID");
    res[0] = Rcpp::wrap(spectrumID);
    for(size_t i = 0; i < score.size(); i++) {
      res[i + 1] = Rcpp::wrap(score[i]);
    }

    res.attr("names") = names;
    Rcpp::DataFrame out(res);

    return out;
  }
}

Rcpp::DataFrame RcppIdent::getSpecParams(  )
{
    vector<SpectrumIdentificationResultPtr> spectrumIdResult = mzid->analysisCollection.spectrumIdentification[0]->spectrumIdentificationListPtr->spectrumIdentificationResult;
    vector<string> spectrumID;
    vector<string> names;
    int count = 0;

    for(size_t i = 0; i < spectrumIdResult[0]->cvParams.size(); i++)
    {
        if(!spectrumIdResult[0]->cvParams[i].value.empty())
        {
            count++;
            names.push_back(cvTermInfo(spectrumIdResult[0]->cvParams[i].cvid).name);
        }
    }
    if(count == 0)
    {
        Rcpp::Rcout << "No spectrum cvParams available" << std::endl;
        return Rcpp::DataFrame::create();
    }
    else
    {
        vector<vector<string> > score(count);

        for (size_t i = 0; i < spectrumIdResult.size(); i++)
        {
            spectrumID.push_back(spectrumIdResult[i]->spectrumID);
            count = 0;
            for(size_t j = 0; j < spectrumIdResult[i]->cvParams.size(); j++)
            {
                if(!spectrumIdResult[i]->cvParams[j].value.empty())
                {
                    score[count].push_back(lexical_cast<string>(spectrumIdResult[i]->cvParams[j].value));
                    count++;
                }
            }
        }

        Rcpp::List res(score.size() + 1);

        names.insert(names.begin(), "spectrumID");

        res[0] = Rcpp::wrap(spectrumID);

        for(size_t i = 0; i < score.size(); i++)
        {
            res[i + 1] = Rcpp::wrap(score[i]);
        }

        res.attr("names") = names;
        Rcpp::DataFrame out(res);

        return out;
    }
}

Rcpp::List RcppIdent::getPara(  )
{

    std::vector<SpectrumIdentificationProtocolPtr> sip = mzid->analysisProtocolCollection.spectrumIdentificationProtocol;
	std::vector<std::string> names, values;

	names.push_back("searchType");
	values.push_back(underscore(cvTermInfo(sip[0]->searchType.cvid).name));

    for(int i = 0 ; i < sip[0]->additionalSearchParams.cvParams.size(); i++)
    {
		names.push_back(underscore(cvTermInfo(sip[0]->additionalSearchParams.cvParams[i].cvid).name));
		values.push_back("true");
	}

    for(int i = 0; i < sip[0]->additionalSearchParams.userParams.size(); i++)
    {
		names.push_back(underscore(sip[0]->additionalSearchParams.userParams[i].name));
	if(sip[0]->additionalSearchParams.userParams[i].value.empty())
	{
			values.push_back("true");
	}
	else
	{
			values.push_back(sip[0]->additionalSearchParams.userParams[i].value);
	}
    }

    Rcpp::List res(names.size());

    for (size_t i = 0; i < names.size(); i++)
    {
		if (isNumber(values[i]))
		{
			res[i] = Rcpp::wrap(lexical_cast<double>(values[i]));
		}
		else if (isBool(values[i]))
		{
			res[i] = Rcpp::wrap(toBool(values[i]));
		}
		else
		{
			res[i] = Rcpp::wrap(values[i]);
		}
	}

    res.attr("names") = names;
    return res;
}

Rcpp::DataFrame RcppIdent::getDB(  )
{
    vector<SearchDatabasePtr> sdb = mzid->dataCollection.inputs.searchDatabase;

    std::vector<std::string> dbLocation;
    std::vector<std::string> dbID;
    std::vector<std::string> dbName;
    std::vector<std::string> dbVersion;
    std::vector<long> numDatabaseSequences;
    std::vector<long> numResidues;
    for (size_t i = 0; i < sdb.size(); i++)
    {
	dbLocation.push_back(sdb[i]->location);
	dbID.push_back(sdb[i]->id);
	dbName.push_back(sdb[i]->name);
	dbVersion.push_back(sdb[i]->version);
	numDatabaseSequences.push_back(sdb[i]->numDatabaseSequences);
	numResidues.push_back(sdb[i]->numResidues);
    }
    Rcpp::DataFrame database = Rcpp::List::create(
				   Rcpp::_["location"]  = dbLocation,
				   Rcpp::_["id"]  = dbID,
				   Rcpp::_["name"]  = dbName,
				   Rcpp::_["numDatabaseSequences"]  = numDatabaseSequences,
				   Rcpp::_["numResidues"]  = numResidues,
				   Rcpp::_["version"]  = dbVersion
			       );

    return database;
}