inst/include/seqan/align/align_base.h
d92ab6f0
 // ==========================================================================
 //                 SeqAn - The Library for Sequence Analysis
 // ==========================================================================
 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 //     * Redistributions of source code must retain the above copyright
 //       notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above copyright
 //       notice, this list of conditions and the following disclaimer in the
 //       documentation and/or other materials provided with the distribution.
 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
 //       its contributors may be used to endorse or promote products derived
 //       from this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 // DAMAGE.
 //
 // ==========================================================================
 // Author: Andreas Gogol-Doering <andreas.doering@mdc-berlin.de>
 // ==========================================================================
 // The Align class provides tabular alignments.  It contains multiple
 // Gap objects, one for each row of the alignment.
 // ==========================================================================
 
 #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_
 #define SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_
 
 namespace seqan {
 
 // ============================================================================
 // Forwards
 // ============================================================================
 
 // ============================================================================
 // Tags, Classes, Enums
 // ============================================================================
 
 // ----------------------------------------------------------------------------
 // Class Align
 // ----------------------------------------------------------------------------
 
 /*!
  * @class Align
  * @implements EqualityComparableConcept
  * @headerfile <seqan/align.h>
  * @brief Tabular alignment of same-type sequences.
  *
  * @signature template <typename TSequence, typename TGapSpec>
  *            class Align;
  *
  * @tparam TSequence Type of the underlying sequence.
  * @tparam TGapSpec  Tag for selecting the @link Gaps @endlink specialization.
  *
  * The Alignment class is for storing tabular alignments of sequences having the same type.  They do so by being a
  * container of @link Gaps @endlink objects.  The most common use case is storing pairwise alignments that are
  * generated by one of the dynamic programming alignment algorithms.
  *
  * @section Example
  *
  * Here is an example of using an Align object with @link globalAlignment @endlink.
  *
  * @include demos/dox/align/align.cpp
  *
  * The output is as follows:
  *
  * @include demos/dox/align/align.cpp.stdout
  *
  * @see globalAlignment
  * @see localAlignment
  * @see Gaps
  */
 
 template <typename TSource, typename TSpec = ArrayGaps>
 class Align
 {
 public:
     typedef Gaps<TSource, TSpec> TGaps;
     typedef String<TGaps> TRows;
     typedef typename Size<TRows>::Type TRowsSize;
 
     TRows data_rows;
 
     Align() {}
 
     template <typename TString, typename TStringsetSpec>
     Align(StringSet<TString, TStringsetSpec> & stringset)
     {
         setStrings(*this, stringset);
     }
 
     Align & operator=(Align const & other)
     {
         data_rows = other.data_rows;
         return *this;
     }
 };
 
 // ============================================================================
 // Metafunctions
 // ============================================================================
 
 // ----------------------------------------------------------------------------
 // Metafunction Cols
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#Cols
  * @brief Return the type representing a column in an Align object.
  *
  * @signature Cols<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the column type for.
  *
  * @return Type   The resulting column type.
  */
 
 template <typename TSource, typename TSpec>
 struct Cols<Align<TSource, TSpec> >
 {
     typedef AlignCols<Align<TSource, TSpec> > Type;
 };
 
 template <typename TSource, typename TSpec>
 struct Cols<Align<TSource, TSpec> const>
 {
     typedef AlignCols<Align<TSource, TSpec> const> Type;
 };
 
 // ----------------------------------------------------------------------------
 // Metafunction Value
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#Value
  * @brief Return the value type for an Align object.
  *
  * @signature Value<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the value type for.
  *
  * @return Type   The resulting value type.
  */
 
 template <typename TSource, typename TSpec>
 struct Value<Align<TSource, TSpec> >:
     Value<Gaps<TSource, TSpec> >
 {};
 template <typename TSource, typename TSpec>
 struct Value<Align<TSource, TSpec> const>:
     Value<Gaps<TSource, TSpec> const>
 {};
 
 // ----------------------------------------------------------------------------
 // Metafunction GetValue
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#GetValue
  * @brief Return the get-value type for an Align object.
  *
  * @signature GetValue<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the get-value type for.
  *
  * @return Type   The resulting get-value type.
  */
 
 template <typename TSource, typename TSpec>
 struct GetValue<Align<TSource, TSpec> >:
     GetValue<Gaps<TSource, TSpec> >
 {};
 template <typename TSource, typename TSpec>
 struct GetValue<Align<TSource, TSpec> const>:
     GetValue<Gaps<TSource, TSpec> const>
 {};
 
 // ----------------------------------------------------------------------------
 // Metafunction Reference
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#Reference
  * @brief Return the reference type for an Align object.
  *
  * @signature GetValue<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the reference type for.
  *
  * @return Type   The resulting reference type.
  */
 
 template <typename TSource, typename TSpec>
 struct Reference<Align<TSource, TSpec> >:
     Reference<Gaps<TSource, TSpec> >
 {};
 template <typename TSource, typename TSpec>
 struct Reference<Align<TSource, TSpec> const>:
     Reference<Gaps<TSource, TSpec> const>
 {};
 
 // ----------------------------------------------------------------------------
 // Metafunction Rows
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#Row
  * @brief Return the row type (@link Gaps @endlink specialization).
  *
  * @signature Row<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the row type for.
  *
  * @return Type   The resulting row type.
  */
 
 /*!
  * @mfn Align#Rows
  * @brief Return the type used for rows in an Align object (a Gaps specialization).
  *
  * @signature Rows<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the rows type for.
  *
  * @return Type   The resulting rows type.
  */
 
 template <typename TSource, typename TSpec>
 struct Rows<Align<TSource, TSpec> >
 {
     typedef String<Gaps<TSource, TSpec> > Type;
 };
 template <typename TSource, typename TSpec>
 struct Rows<Align<TSource, TSpec> const>
 {
     typedef String<Gaps<TSource, TSpec> > const Type;
 };
 
 // ----------------------------------------------------------------------------
 // Metafunction Source
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#Source
  * @brief Return the type of the underlying sequence.
  *
  * @signature Rows<TAlign>::Type;
  *
  * @tparam TAlign The Align object to get the underlying sequence type for.
  *
  * @return Type   The resulting sequence type.
  */
 
 template <typename TSource, typename TSpec>
 struct Source<Align<TSource, TSpec> >
 {
     typedef TSource Type;
 };
 template <typename TSource, typename TSpec>
 struct Source<Align<TSource, TSpec> const>
 {
     typedef TSource Type;
 };
 
 // ----------------------------------------------------------------------------
 // Metafunction StringSetType
 // ----------------------------------------------------------------------------
 
 /*!
  * @mfn Align#StringSetType
  * @brief Return the type that would be used for a string set of the sources.
  *
  * @signature Rows<TAlign>::Type
  *
  * @tparam TAlign The Align object to get the string set type for.
  *
  * @return Type   The resulting string set type.
  */
 
 template <typename TSource, typename TSpec>
 struct StringSetType<Align<TSource, TSpec> >
 {
     typedef StringSet<TSource, Dependent<> > Type;
 };
 
 template <typename TSource, typename TSpec>
 struct StringSetType<Align<TSource, TSpec> const>
 {
     typedef StringSet<TSource, Dependent<> > Type;
 };
 
 // ============================================================================
 // Functions
 // ============================================================================
 
 // ----------------------------------------------------------------------------
 // Function move()
 // ----------------------------------------------------------------------------
 
 template <typename TSource, typename TSpec>
 inline
 void move(Align<TSource, TSpec> & target, Align<TSource, TSpec> & source)
 {
     move(target.data_rows, source.data_rows);
 }
 
 // ----------------------------------------------------------------------------
 // Function rows()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#rows
  * @brief Returns the rows of an Align object.
  *
  * @signature TRows rows(align);
  *
  * @param[in] align The Align object to get the rows for.
  *
  * @return TRows A container with the Gaps of the Align object.
  */
 
 template <typename TSource, typename TSpec>
 inline typename Rows<Align<TSource, TSpec> >::Type &
 rows(Align<TSource, TSpec> & me)
 {
     return me.data_rows;
 }
 
 template <typename TSource, typename TSpec>
 inline typename Rows<Align<TSource, TSpec> const>::Type &
 rows(Align<TSource, TSpec> const & me)
 {
     return me.data_rows;
 }
 
 // ----------------------------------------------------------------------------
 // Function row()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#row
  * @brief Returns a single row of an Align object.
  *
  * @signature TRow row(align, pos);
  *
  * @param[in] align The Align object to get the row of.
  * @param[in] pos   The number of the row to get.
  *
  * @return TRow Reference to the given row of align  (Metafunction: @link Align#Row @endlink).
  */
 
 template <typename TSource, typename TSpec, typename TPosition>
 inline typename Row<Align<TSource, TSpec> >::Type &
 row(Align<TSource, TSpec> & me,
     TPosition _pos)
 {
     return value(rows(me), _pos);
 }
 
 template <typename TSource, typename TSpec, typename TPosition>
 inline typename Row<Align<TSource, TSpec> const>::Type &
 row(Align<TSource, TSpec> const & me,
     TPosition _pos)
 {
     return value(rows(me), _pos);
 }
 
 // ----------------------------------------------------------------------------
 // Function cols()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#cols
  * @brief Returns the columns of an Align object.
  *
  * @signature TCols cols(align);
  *
  * @param[in] align The Align object to get the cols of.
  *
  * @return TCols The columns of the Align object (Metafunction: @link Align#Cols @endlink).
  */
 
 template <typename TSource, typename TSpec>
 inline typename Cols<Align<TSource, TSpec> >::Type
 cols(Align<TSource, TSpec> & me)
 {
     return typename Cols<Align<TSource, TSpec> >::Type(me);
 }
 
 template <typename TSource, typename TSpec>
 inline typename Cols<Align<TSource, TSpec> const>::Type
 cols(Align<TSource, TSpec> const & me)
 {
     return typename Cols<Align<TSource, TSpec> const>::Type(me);
 }
 
 // ----------------------------------------------------------------------------
 // Function col()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#col
  * @brief Returns the columns of an Align object.
  *
  * @signature TCol col(align);
  *
  * @param[in] align The Align object to get the cols of.
  *
  * @return TCol The column of the Align object (Metafunction: @link Align#Col @endlink).
  */
 
 template <typename TSource, typename TSpec, typename TPosition>
 inline typename Col<Align<TSource, TSpec> >::Type
 col(Align<TSource, TSpec> & me,
     TPosition _pos)
 {
     return value(cols(me), _pos);
 }
 
 template <typename TSource, typename TSpec, typename TPosition>
 inline typename Col<Align<TSource, TSpec> const>::Type
 col(Align<TSource, TSpec> const & me,
     TPosition _pos)
 {
     return value(cols(me), _pos);
 }
 
 // ----------------------------------------------------------------------------
 // Function detach()
 // ----------------------------------------------------------------------------
 
 template <typename TSource, typename TSpec>
 inline void
 detach(Align<TSource, TSpec> & me)
 {
     typedef Align<TSource, TSpec> TAlign;
     typedef typename Rows<TAlign>::Type TRows;
     typedef typename Iterator<TRows, Standard>::Type TRowsIterator;
 
     TRowsIterator it = begin(rows(me));
     TRowsIterator it_end = end(rows(me));
 
     while (it != it_end)
     {
         detach(*it);
         ++it;
     }
 }
 
 // ----------------------------------------------------------------------------
 // Function write()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#write
  * @deprecated Old-style I/O.
  * @brief Writing of Gaps to Streams in human-readable format.
  *
  * @signature void write(stream, align);
  *
  * @param[in,out] stream The Stream to write to.
  * @param[in]     align  The Align object to write out.
  */
 
 template <typename TFile, typename TSource, typename TSpec>
 inline void
 _write(TFile & target,
       Align<TSource, TSpec> const & source)
 {
     typedef Align<TSource, TSpec> const TAlign;
     typedef typename Row<TAlign>::Type TRow;
     typedef typename Position<typename Rows<TAlign>::Type>::Type TRowsPosition;
     typedef typename Position<TAlign>::Type TPosition;
 
     TRowsPosition row_count = length(rows(source));
     TPosition begin_ = 0;
     TPosition end_ = std::min(length(row(source, 0)), length(row(source, 1)));
 
     unsigned int baseCount = 0;
     unsigned int leftSpace = 6;
     while (begin_ < end_)
     {
         unsigned int windowSize_ = 50;
         if ((begin_ + windowSize_) > end_)
             windowSize_ = end_ - begin_;
 
         // Print header line
         char buffer[20];
         int len = snprintf(buffer, 20, "%7u", (unsigned)baseCount);
         write(target, buffer, len);
         baseCount += windowSize_;
         writeValue(target, ' ');
         for (TPosition i = 1; i <= windowSize_; ++i)
         {
             if ((i % 10) == 0)
                 writeValue(target, ':');
             else if ((i % 5) == 0)
                 writeValue(target, '.');
             else
                 writeValue(target, ' ');
         }
         writeValue(target, ' ');
         writeValue(target, '\n');
 
         // Print sequences
         for (TRowsPosition i = 0; i < 2 * row_count - 1; ++i)
         {
             for (unsigned int j = 0; j < leftSpace + 2; ++j)
                 writeValue(target, ' ');
             if ((i % 2) == 0)
             {
                 TRow & row_ = row(source, i / 2);
                 typedef typename Iterator<typename Row<TAlign>::Type const, Standard>::Type TIter;
                 TIter begin1_ = iter(row_, begin_);
                 TIter end1_ = iter(row_, begin_ + windowSize_);
                 for (; begin1_ != end1_; ++begin1_)
                 {
                     if (isGap(begin1_))
                         writeValue(target, gapValue<char>());
                     else
                         writeValue(target, getValue(begin1_));
                 }
             }
             else
             {
                 for (unsigned int j = 0; j < windowSize_; ++j)
                 {
                     if ((!isGap(row(source, (i - 1) / 2), begin_ + j)) &&
                         (!isGap(row(source, (i + 1) / 2), begin_ + j)) &&
                         (row(source, (i - 1) / 2)[begin_ + j] == row(source, (i + 1) / 2)[begin_ + j]))
                     {
                         writeValue(target, '|');
                     }
                     else
                     {
                         writeValue(target, ' ');
                     }
                 }
             }
             writeValue(target, '\n');
         }
         writeValue(target, '\n');
         begin_ += 50;
     }
     writeValue(target, '\n');
 }
 
 template <typename TFile, typename TSource, typename TSpec>
 [[deprecated("Old-style I/O. Use stream operator << instead.")]]
 inline void
 write(TFile & target,
       Align<TSource, TSpec> const & source)
 {
     _write(target, source);
 }
 
 // ----------------------------------------------------------------------------
 // Function clearClipping()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#clearClipping
  * @brief Clear clipping on all rows.
  *
  * @signature void clearClipping(align);
  *
  * @param[in,out] align Align object to clear clippings of.
  */
 
 // TODO(holtgrew): Undocumented.
 
 template <typename TSource, typename TSpec>
 inline void
 clearClipping(Align<TSource, TSpec> & align_)
 {
     typedef typename Rows<Align<TSource, TSpec> >::Type TRows;
     typedef typename Iterator<TRows>::Type TRowsIterator;
 
     for (TRowsIterator it = begin(rows(align_)); it != end(rows(align_)); goNext(it))
         clearClipping(*it);
 }
 
 // ----------------------------------------------------------------------------
 // Function operator<<()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#operator<<
  * @brief Stream-output for Align objects.
  *
  * @signature TStream operator<<(stream, align);
  *
  * @param[in,out] stream <tt>std::ostream</tt> to write to.
  * @param[in]     align  Align object to write out.
  *
  * @return TStream Reference to stream after output of align.
  */
 
 // stream operators
 
 template <typename TStream, typename TSource, typename TSpec>
 inline TStream &
 operator<<(TStream & target,
            Align<TSource, TSpec> const & source)
 {
     typename DirectionIterator<TStream, Output>::Type it = directionIterator(target, Output());
     _write(it, source);
     return target;
 }
 
 // ----------------------------------------------------------------------------
 // Function setStrings()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#setStrings
  * @brief Loads the sequences of a string set into an alignment.
  *
  * @signature void setStrings(align, stringSet);
  *
  * @param[in,out] align     Align object to set underlying sequence of.
  * @param[in]     stringSet The @link StringSet @endlink with the data.
  */
 
 template <typename TSource, typename TSpec, typename TSpec2>
 inline void
 setStrings(Align<TSource, TSpec> & me,
            StringSet<TSource, TSpec2> & stringset)
 {
     typedef Align<TSource, TSpec> TAlign;
     typedef StringSet<TSource, TSpec2> TStringset;
 
     typedef typename Rows<TAlign>::Type TRows;
     typedef typename Iterator<TRows>::Type TRowsIterator;
     typedef typename Size<TStringset>::Type TStringsetSize;
 
     clear(me.data_rows);
     resize(me.data_rows, length(stringset));
     TRowsIterator it = begin(rows(me));
     TStringsetSize stringset_length = length(stringset);
     for (TStringsetSize i = 0; i < stringset_length; ++i)
     {
         setSource(*it, value(stringset, i));
         ++it;
     }
 }
 
 // ----------------------------------------------------------------------------
 // Function clearGaps()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#clearGaps
  * @brief Clear gaps of all Align rows.
  *
  * @signature void clearGaps(align);
  *
  * @param[in] align The Align object to clear all all gaps from.
  */
 
 template <typename TSource, typename TSpec>
 inline void
 clearGaps(Align<TSource, TSpec> & me)
 {
     typedef Align<TSource, TSpec> TAlign;
     typedef typename Rows<TAlign>::Type TRows;
     typedef typename Iterator<TRows>::Type TRowsIterator;
 
     for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it))
         clearGaps(*it);
 }
 
 // ----------------------------------------------------------------------------
 // Function stringSet()
 // ----------------------------------------------------------------------------
 
 /*!
  * @fn Align#stringSet
  * @brief Return string set with all ungapped sequences.
  *
  * @signature TStringSet stringSet(align);
  *
  * @param[in] align Align object to get sequences of.
  *
  * @return TStringSet The set of ungapped sequences (Metafunction: @link Align#StringSetType @endlink).
  */
 
 template <typename TSource, typename TSpec>
 inline typename StringSetType<Align<TSource, TSpec> >::Type
 stringSet(Align<TSource, TSpec> & me)
 {
     typedef Align<TSource, TSpec> TAlign;
     typedef typename StringSetType<TAlign>::Type TStringSet;
 
     typedef typename Rows<TAlign>::Type TRows;
     typedef typename Iterator<TRows>::Type TRowsIterator;
 
     TStringSet ss;
 
     for (TRowsIterator it = begin(rows(me)); it != end(rows(me)); goNext(it))
         appendValue(ss, source(*it));
     return ss;
 }
 
 // ----------------------------------------------------------------------------
 // Function operator==()
 // ----------------------------------------------------------------------------
 
 template <typename TSource, typename TSpec>
 inline bool operator==(Align<TSource, TSpec> const & lhs,
                        Align<TSource, TSpec> const & rhs)
 {
     if (length(lhs.data_rows) != length(rhs.data_rows))
         return false;
     typedef typename Align<TSource, TSpec>::TGaps        TGaps;
     typedef typename Iterator<TGaps const, Rooted>::Type TIter;
     for (unsigned i = 0; i < length(rhs.data_rows); ++i)
         for (TIter itL = begin(lhs.data_rows[i], Rooted()), itR = begin(rhs.data_rows[i], Rooted()); !atEnd(itL); goNext(itL), goNext(itR))
             if (*itL != *itR)
                 return false;
     return true;
 }
 
 // ----------------------------------------------------------------------------
 // Function operator!=()
 // ----------------------------------------------------------------------------
 
 template <typename TSource, typename TSpec>
 inline bool operator!=(Align<TSource, TSpec> const & lhs,
                        Align<TSource, TSpec> const & rhs)
 {
     return !(lhs == rhs);
 }
 
 }  // namespace seqan
 
 #endif  // #ifndef SEQAN_INCLUDE_SEQAN_ALIGN_ALIGN_BASE_H_