// ========================================================================== // SeqAn - The Library for Sequence Analysis // ========================================================================== // Copyright (c) 2006-2018, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Author: Rene Rahn // ========================================================================== // Jst traversal buffer used to construct sequence contents. // ========================================================================== #ifndef EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_JOURNALED_SEQUENCE_BUFFER_H_ #define EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_JOURNALED_SEQUENCE_BUFFER_H_ namespace seqan { // ============================================================================ // Forwards // ============================================================================ // ============================================================================ // Tags, Classes, Enums // ============================================================================ // ---------------------------------------------------------------------------- // Tag Member Tags // ---------------------------------------------------------------------------- struct JstBufferSetMember_; typedef Tag JstBufferSetMember; struct JstBufferDeltaMapMember_; typedef Tag JstBufferDeltaMapMember; // ---------------------------------------------------------------------------- // Class JstBuffer_ // ---------------------------------------------------------------------------- template class JstBuffer_ { public: // __ Types ___________________________________________________________________ typedef typename Member::Type TJournaledSet; typedef typename Member::Type TSource; typedef typename Iterator::Type TSourceIterator; typedef typename Member::Type TDeltaMap; typedef typename Iterator::Type TDeltaIterator; typedef typename Size::Type TSourceSize; typedef String TStringPositions; // __ Members _________________________________________________________________ bool _isSynchronized; TDeltaMap* _deltaMapPtr; TSourceIterator _sourceBegin; // Iterator to the clipped begin position of the source. TSourceIterator _sourceEnd; // Iterator to the clipped end position of the source. TDeltaIterator _deltaRangeBegin; TDeltaIterator _deltaRangeEnd; TStringPositions _startPositions; // The begin positions of the strings within the chunk. TJournaledSet _journaledSet; // The actual sequences over the current chunk. // __ Constructor _____________________________________________________________ JstBuffer_() {} JstBuffer_(TDeltaMap & map) : _deltaMapPtr(&map) {} JstBuffer_(TDeltaMap & map, TSourceIterator const & srcBeg, TSourceIterator const & srcEnd) : _deltaMapPtr(&map), _sourceBegin(srcBeg), _sourceEnd(srcEnd) { sync(*this); } }; // ============================================================================ // Metafunctions // ============================================================================ // ---------------------------------------------------------------------------- // Metafunction Size // ---------------------------------------------------------------------------- template struct Size > { typedef typename Source::Type TSource_; typedef typename Size::Type Type; }; // ---------------------------------------------------------------------------- // Metafunction Member [JstSeqBufferJournaledSet] // ---------------------------------------------------------------------------- template struct Member, JstBufferSetMember> { typedef typename Member::Type TSource_; typedef StringSet > Type; }; // ---------------------------------------------------------------------------- // Metafunction Member [JstBufferDeltaMapMember] // ---------------------------------------------------------------------------- template struct Member, JstBufferDeltaMapMember> { typedef typename Member::Type Type; }; template struct Member const, JstBufferDeltaMapMember> { typedef typename Member::Type const Type; }; // ============================================================================ // Private Functions // ============================================================================ namespace impl { // ---------------------------------------------------------------------------- // Function impl::journalDelta() [DeltaTypeSnp] // ---------------------------------------------------------------------------- template inline void journalDelta(TTarget & target, TPos refPos, TSnp const & snp, DeltaTypeSnp const & /*tag*/) { typedef typename JournalType::Type TJournalEntries; typedef typename Iterator::Type TEntriesIterator; typedef typename Position::Type TEntryPos; TEntriesIterator entryIt = end(_journalEntries(target), Standard()) -1; SEQAN_ASSERT_EQ(entryIt->segmentSource, SOURCE_ORIGINAL); SEQAN_ASSERT_GEQ(refPos, entryIt->physicalOriginPosition); SEQAN_ASSERT_GT(entryIt->physicalOriginPosition + entryIt->length, refPos); TEntryPos virtPos = entryIt->virtualPosition + (refPos - entryIt->physicalOriginPosition); appendValue(target._insertionBuffer, snp); _doRecordInsertion(target._journalEntries, entryIt, virtPos, length(target._insertionBuffer) - 1, 1u); entryIt = end(_journalEntries(target), Standard()) -1; ++virtPos; _doRecordErase(target._journalEntries, entryIt, virtPos, virtPos + 1); } // ---------------------------------------------------------------------------- // Function impl::journalDelta() [DeltaTypeDel] // ---------------------------------------------------------------------------- template inline void journalDelta(TTarget & target, TPos refPos, TSize const & delLength, DeltaTypeDel const & /*tag*/) { typedef typename JournalType::Type TJournalEntries; typedef typename Iterator::Type TEntryIterator; typedef typename Position::Type TEntryPos; TEntryIterator entryIt = end(target._journalEntries, Standard()) - 1; SEQAN_ASSERT_EQ(entryIt->segmentSource, SOURCE_ORIGINAL); SEQAN_ASSERT_GEQ(refPos, entryIt->physicalOriginPosition); SEQAN_ASSERT_GT(entryIt->physicalOriginPosition + entryIt->length, refPos); target._length -= delLength; TEntryPos virtPos = entryIt->virtualPosition + (refPos - entryIt->physicalOriginPosition); _doRecordErase(target._journalEntries, entryIt, virtPos, virtPos + delLength); if (length(target._journalEntries) == 0) clear(target._insertionBuffer); } // ---------------------------------------------------------------------------- // Function impl::journalDelta() [DeltaTypeIns] // ---------------------------------------------------------------------------- template inline void journalDelta(TTarget & target, TPos refPos, TInsertion const & insSeq, DeltaTypeIns const & /*tag*/) { typedef typename JournalType::Type TJournalEntries; typedef typename Iterator::Type TEntryIterator; typedef typename Position::Type TEntryPos; TEntryIterator entryIt = end(target._journalEntries, Standard()) - 1; SEQAN_ASSERT_EQ(entryIt->segmentSource, SOURCE_ORIGINAL); SEQAN_ASSERT_GEQ(refPos, entryIt->physicalOriginPosition); SEQAN_ASSERT_GEQ(entryIt->physicalOriginPosition + entryIt->length, refPos); // Special case for the insertion where it is valid to insert behind the sequence. target._length += length(insSeq); TEntryPos virtPos = entryIt->virtualPosition + (refPos - entryIt->physicalOriginPosition); TEntryPos physPos = length(target._insertionBuffer); append(target._insertionBuffer, insSeq); _doRecordInsertion(target._journalEntries, entryIt, virtPos, physPos, length(insSeq)); } // ---------------------------------------------------------------------------- // Function impl::journalDelta() [DeltaTypeSV] // ---------------------------------------------------------------------------- template inline void journalDelta(TTarget & target, TPos refPos, TSV const & sv, DeltaTypeSV const & /*tag*/) { typedef typename JournalType::Type TJournalEntries; typedef typename Iterator::Type TEntryIterator; typedef typename Position::Type TEntryPos; TEntryIterator entryIt = end(target._journalEntries, Standard()) - 1; SEQAN_ASSERT_EQ(entryIt->segmentSource, SOURCE_ORIGINAL); SEQAN_ASSERT_GEQ(refPos, entryIt->physicalOriginPosition); SEQAN_ASSERT_GT(entryIt->physicalOriginPosition + entryIt->length, refPos); target._length -= sv.i1; TEntryPos virtPos = entryIt->virtualPosition + (refPos - entryIt->physicalOriginPosition); _doRecordErase(target._journalEntries, entryIt, virtPos, virtPos + sv.i1); entryIt = end(target._journalEntries, Standard()) - 1; target._length += length(sv.i2); TEntryPos physPos = length(target._insertionBuffer); append(target._insertionBuffer, sv.i2); _doRecordInsertion(target._journalEntries, entryIt, virtPos, physPos, length(sv.i2)); } // ---------------------------------------------------------------------------- // Function impl::create() // ---------------------------------------------------------------------------- template struct JournaledStringCreateFunctor { TMapIter mapIt; TJSIter setIt; template inline void operator()(TTag const & /*deltaType*/) { if ((*mapIt).deltaTypeEnd != DeltaEndType::IS_RIGHT) impl::journalDelta(*setIt, getDeltaPosition(*mapIt), deltaValue(mapIt, TTag()), TTag()); } }; template inline void create(JstBuffer_ & buffer) { typedef typename Member, JstBufferSetMember>::Type TJSet; typedef typename Value::Type TJString; typedef typename Iterator::Type TJSetIter; typedef typename Member, JstBufferDeltaMapMember>::Type TDeltaMap; typedef typename Iterator::Type TDeltaMapIter; SEQAN_ASSERT(!empty(buffer._startPositions)); clear(buffer._journaledSet); // Guarentee empty set. // Initialize the journaled set. resize(buffer._journaledSet, length(buffer._startPositions), Exact()); forEach(buffer._journaledSet, [&buffer](TJString &jStr) { setHost(jStr, host(container(buffer._sourceBegin))); }, Parallel()); // Construct the journaled string context Splitter jSetSplitter(begin(buffer._journaledSet, Standard()), end(buffer._journaledSet, Standard()), Parallel()); SEQAN_OMP_PRAGMA(parallel for) for (auto jobId = 0; jobId < static_cast(length(jSetSplitter)); ++jobId) { impl::JournaledStringCreateFunctor f; f.mapIt = buffer._deltaRangeBegin; for (; f.mapIt != buffer._deltaRangeEnd; ++f.mapIt) { f.setIt = jSetSplitter[jobId]; if (SEQAN_UNLIKELY((*f.mapIt).deltaTypeEnd == DeltaEndType::IS_RIGHT)) continue; auto covIt = begin(getDeltaCoverage(*f.mapIt), Standard()) + (f.setIt - begin(buffer._journaledSet, Standard())); for (; f.setIt != jSetSplitter[jobId + 1]; ++f.setIt, ++covIt) { DeltaTypeSelector deltaSelector; if (*covIt) // If the current sequence covers the current delta. applyOnDelta(f, getDeltaType(*f.mapIt), deltaSelector); } } } } // ---------------------------------------------------------------------------- // Function impl::synchronize() // ---------------------------------------------------------------------------- template inline bool synchronize(JstBuffer_ & buffer) { typedef typename Size >::Type TSize; SEQAN_ASSERT(buffer._sourceBegin < buffer._sourceEnd); // TODO(rrahn): Implement block wise computation. // Discover overlapping deletions at begin and end. // 0123456789012345678901234... // xxxxx------------xxxxxxxx... // [------xxxxxxxx...[ // Beginning of the buffer lies within a deletion of a sequence. // // When detecting the deletion check if end point lies behind source begin position. // If false: ignore deletion. // If true: Add deletion point to the mapExtension -> Maybe we can even add it as a new branching point. // In this case we add a deletion right from the beginning. // We just communicate through this object anyway. -> Then the algorithm does not have to change. // If this is at the end: we cut the deletion -> But we have one at the same position which is just longer. // But you would cut after the deletion anyway. // We could only represent infixes over the journaled strings after they have been constructed? // So we have a fixed end position and begin position. buffer._deltaRangeBegin = begin(*buffer._deltaMapPtr, Standard()); buffer._deltaRangeEnd = end(*buffer._deltaMapPtr, Standard()); // TODO(rrahn): Enable this when allowing chunking. // buffer._deltaRangeBegin = std::lower_bound(begin(buffer._deltaMap, Standard()), // end(buffer._deltaMap, Standard()), // position(buffer._sourceBegin), DeltaExtensionCompareLessPos_()); // buffer._deltaRangeEnd = std::lower_bound(begin(buffer._deltaMap, Standard()), end(buffer._deltaMap, Standard()), // position(buffer._sourceEnd), DeltaExtensionCompareLessPos_()); // Stream from the beginning to the expected range to get the begin positions of the current segment. auto mapIt = begin(*buffer._deltaMapPtr, Standard()); resize(buffer._startPositions, length(getDeltaCoverage(*mapIt)), position(buffer._sourceBegin), Exact()); if (position(buffer._sourceBegin) == 0) // Special case: We also set the begin position to 0 even if there is an insertion at the 0th position. return true; for (; mapIt != buffer._deltaRangeBegin; ++mapIt) { auto net = netSize(mapIt); auto covBegin = begin(getDeltaCoverage(*mapIt), Standard()); for (auto covIt = covBegin; covIt != end(getDeltaCoverage(*mapIt), Standard()); ++covIt) { if (*covIt) { if (SEQAN_UNLIKELY(static_cast(std::abs(net)) > buffer._startPositions[covIt - covBegin] && (net < 0))) buffer._startPositions[covIt - covBegin] = 0; // In case the entire prefix of this sequence is deleted. else buffer._startPositions[covIt - covBegin] += net; } } } buffer._isSynchronized = true; return true; } } // namespace impl // ============================================================================ // Public Functions // ============================================================================ // ---------------------------------------------------------------------------- // Function sourceBegin() // ---------------------------------------------------------------------------- template inline typename JstBuffer_::TSourceIterator sourceBegin(JstBuffer_ & buffer) { return buffer._sourceBegin; } // ---------------------------------------------------------------------------- // Function setSourceBegin() // ---------------------------------------------------------------------------- template inline void setSourceBegin(JstBuffer_ & buffer, TSourceIter const & begin) { buffer._isSynchronized = false; buffer._sourceBegin = begin; } // ---------------------------------------------------------------------------- // Function sourceEnd() // ---------------------------------------------------------------------------- template inline typename JstBuffer_::TSourceIterator sourceEnd(JstBuffer_ & buffer) { return buffer._sourceEnd; } // ---------------------------------------------------------------------------- // Function setSourceEnd() // ---------------------------------------------------------------------------- template inline void setSourceEnd(JstBuffer_ & buffer, TSourceIter const & end) { buffer._isSynchronized = false; buffer._sourceEnd = end; } // ---------------------------------------------------------------------------- // Function setDeltaMap() // ---------------------------------------------------------------------------- template inline void setDeltaMap(JstBuffer_ & buffer, typename Member, JstBufferDeltaMapMember>::Type & map) { if (buffer._deltaMapPtr != &map) { markModified(buffer); buffer._deltaMapPtr = ↦ } } // ---------------------------------------------------------------------------- // Function sync() // ---------------------------------------------------------------------------- template inline bool sync(JstBuffer_ & buffer) { if (isSynchronized(buffer)) return true; return impl::synchronize(buffer); } // ---------------------------------------------------------------------------- // Function create() // ---------------------------------------------------------------------------- template inline bool create(JstBuffer_ & buffer) { if (!isSynchronized(buffer)) if (!sync(buffer)) return false; impl::create(buffer); return true; } // ---------------------------------------------------------------------------- // Function isSynchronized() // ---------------------------------------------------------------------------- template inline bool isSynchronized(JstBuffer_ const & buffer) { return buffer._isSynchronized; } // ---------------------------------------------------------------------------- // Function clear() // ---------------------------------------------------------------------------- template inline void clear(JstBuffer_ & buffer) { buffer._isSynchronized = false; buffer._deltaMapPtr = nullptr; clear(buffer._startPositions); } // ---------------------------------------------------------------------------- // Function markModified() // ---------------------------------------------------------------------------- template inline void markModified(JstBuffer_ & buffer) { buffer._isSynchronized = false; } // ---------------------------------------------------------------------------- // Function init(); // ---------------------------------------------------------------------------- // TODO(rrahn): Fix constness issue. template inline void init(JstBuffer_ & me, TJst & jst) { setDeltaMap(me, impl::member(jst, JstDeltaMapMember())); setSourceBegin(me, begin(impl::member(jst, JstSourceMember()), Standard())); setSourceEnd(me, end(impl::member(jst, JstSourceMember()), Standard())); sync(me); create(me); } } // namespace seqan #endif // EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_JOURNALED_SEQUENCE_BUFFER_H_