usr/html/to__simd_8hpp_source.html

 // -----------------------------------------------------------------------------------------------------

 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin

 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik

 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License

 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md

 // -----------------------------------------------------------------------------------------------------


 #pragma once


 #include <seqan3/std/algorithm>

 #include <seqan3/std/iterator>

 #include <seqan3/std/ranges>


 #include <seqan3/core/detail/pack_algorithm.hpp>

 #include <seqan3/core/detail/template_inspection.hpp>

 #include <seqan3/core/range/type_traits.hpp>

 #include <seqan3/range/views/type_reduce.hpp>

 #include <seqan3/utility/simd/algorithm.hpp>

 #include <seqan3/utility/simd/concept.hpp>

 #include <seqan3/utility/simd/simd_traits.hpp>

 #include <seqan3/utility/simd/simd.hpp>

 #include <seqan3/utility/views/zip.hpp>


 namespace seqan3::detail

 {


 template <std::ranges::view urng_t, simd::simd_concept simd_t>

 class view_to_simd : public std::ranges::view_interface<view_to_simd<urng_t, simd_t>>

 {

 private:


     static_assert(std::ranges::forward_range<urng_t>,

                   "The underlying range must model forward_range.");

     static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,

                   "Expects the value type of the underlying range to be an input_range.");

     static_assert(std::default_initializable<std::ranges::range_value_t<urng_t>>,

                   "Expects the inner range to be default constructible.");

     static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,

                   "Expects semi-alphabet as value type of the inner range.");


     using inner_range_type = std::ranges::range_value_t<urng_t>;

     using chunk_type = std::array<simd_t, simd_traits<simd_t>::length>;

     using scalar_type = typename simd_traits<simd_t>::scalar_type;

     using max_simd_type = simd_type_t<uint8_t, simd_traits<simd_t>::max_length>;


     static constexpr bool fast_load = std::ranges::contiguous_range<inner_range_type> &&

                                       std::sized_sentinel_for<std::ranges::iterator_t<inner_range_type>,

                                                               std::ranges::sentinel_t<inner_range_type>> &&

                                       sizeof(alphabet_rank_t<std::ranges::range_value_t<inner_range_type>>) == 1;


     static constexpr uint8_t chunk_size = simd_traits<simd_t>::length;

     static constexpr uint8_t chunks_per_load = simd_traits<simd_t>::max_length / chunk_size;

     static constexpr uint8_t total_chunks = fast_load ? (chunks_per_load * chunks_per_load) : 1;

     static constexpr auto alphabet_size = alphabet_size<std::ranges::range_value_t<inner_range_type>>;


     // Forward declare class' iterator type. See definition below.

     struct iterator_type;


 public:


     constexpr view_to_simd() = default;

     constexpr view_to_simd(view_to_simd const &) = default;

     constexpr view_to_simd(view_to_simd &&) = default;

     constexpr view_to_simd & operator=(view_to_simd const &) = default;

     constexpr view_to_simd & operator=(view_to_simd &&) = default;

     ~view_to_simd() = default;


     constexpr view_to_simd(urng_t urng, scalar_type const padding_value = alphabet_size) :

         urng{std::move(urng)},

         padding_simd_vector{simd::fill<simd_t>(padding_value)},

         padding_value{padding_value}

     {

         // Check if the size is less or equal the simd size.

         if (std::ranges::distance(urng) > chunk_size)

             throw std::invalid_argument{"The size of the underlying range must be less than or equal to the size of "

                                         "the given simd type!"};

     }


     template <typename other_urng_t>

     requires (!std::same_as<std::remove_cvref_t<other_urng_t>, view_to_simd>) &&

              (!std::same_as<other_urng_t, urng_t>) &&

              std::ranges::viewable_range<other_urng_t>

     constexpr view_to_simd(other_urng_t && urng, scalar_type const padding_value = alphabet_size) :

         view_to_simd{views::type_reduce(std::forward<other_urng_t>(urng)), padding_value}

     {}


     constexpr iterator_type begin() noexcept

     {

         return {*this};

     }


     constexpr void begin() const noexcept = delete;


     constexpr std::default_sentinel_t end() noexcept

     {

         return std::default_sentinel;

     }


     constexpr void end() const noexcept = delete;


     constexpr bool empty() const noexcept

         requires std::ranges::forward_range<inner_range_type>

     {

         return std::ranges::all_of(urng, [] (auto & rng)

         {

             return std::ranges::empty(rng);

         });

     }


     constexpr size_t size() const noexcept

         requires std::ranges::sized_range<inner_range_type>

     {

         auto it = std::ranges::max_element(urng, [] (auto & lhs, auto & rhs)

         {

             return std::ranges::size(lhs) < std::ranges::size(rhs);

         });


         return (it != std::ranges::end(urng)) ? (std::ranges::size(*it) + chunk_size - 1) / chunk_size : 0;

     }


 private:


     urng_t urng{};

     std::array<chunk_type, total_chunks> cached_simd_chunks{};

     simd_t padding_simd_vector{};

     scalar_type padding_value{};

 };


 template <std::ranges::view urng_t, simd::simd_concept simd_t>

 class view_to_simd<urng_t, simd_t>::iterator_type

 {

 public:

     using reference = std::span<std::ranges::range_value_t<chunk_type>>;

     using value_type = reference;

     using pointer = void;

     using difference_type = ptrdiff_t;

     using iterator_category = std::input_iterator_tag;

     using iterator_concept = iterator_category;


     constexpr iterator_type()                                  = default;

     constexpr iterator_type(iterator_type const &)             = default;

     constexpr iterator_type(iterator_type &&)                  = default;

     constexpr iterator_type & operator=(iterator_type const &) = default;

     constexpr iterator_type & operator=(iterator_type &&)      = default;

     ~iterator_type()                                           = default;


     constexpr iterator_type(view_to_simd & this_view) : this_view{&this_view}, current_chunk_pos{0}

     {

         // Initialise the iterator of the sub ranges.

         size_t seq_id = 0;

         for (auto it = std::ranges::begin(this_view.urng); it != std::ranges::end(this_view.urng); ++it, ++seq_id)

         {

             cached_iter[seq_id] = std::ranges::begin(*it);

             cached_sentinel[seq_id] = std::ranges::end(*it);

         }


         // The batch is empty and by default the constructed iterator is pointing to the end.

         if (seq_id == 0)

             return;


         // The batch is not empty but might not be full either.

         // If a slot is supposed to be empty, it will be initialised with the iterator of the first sequence set to the

         // end emulating an empty sequence.

         auto sentinel_it = std::ranges::next(cached_iter[0], cached_sentinel[0]);

         for (; seq_id < chunk_size; ++seq_id)

         {

             cached_iter[seq_id] = sentinel_it;

             cached_sentinel[seq_id] = cached_sentinel[0];

         }


         // Check if this is the final chunk already.

         final_chunk = all_iterators_reached_sentinel();


         // Fetch the next available input characters from the sequences and transform them into simd vectors.

         underflow();

     }


     constexpr reference operator*() const noexcept

     {

         assert(this_view != nullptr);

         return std::span{this_view->cached_simd_chunks[current_chunk_pos].begin(),

                          (current_chunk_pos == final_chunk_pos) ? final_chunk_size : chunk_size};

     }


     constexpr iterator_type & operator++(/*pre-increment*/)

     {

         if constexpr (fast_load)

         { // Check if cached chunks have been already consumed and we need to fetch the next chunks.

             if (current_chunk_pos == final_chunk_pos)

             {

                 underflow();

                 current_chunk_pos = 0;

             }

             else

             {

                 ++current_chunk_pos;

             }

         }

         else // In case fast load is not available only one chunk is filled at a time.

         {

             underflow();

         }


         return *this;

     }


     constexpr value_type operator++(int /*post-increment*/)

     {

         value_type tmp = this->operator*();

         ++(*this);

         return tmp;

     }


     constexpr bool operator==(std::default_sentinel_t const &) const noexcept

     {

         return at_end;

     }


     friend constexpr bool operator==(std::default_sentinel_t const &, iterator_type const & rhs) noexcept

     {

         return rhs.at_end;

     }


     constexpr bool operator!=(std::default_sentinel_t const &) const noexcept

     {

         return !at_end;

     }


     friend constexpr bool operator!=(std::default_sentinel_t const &, iterator_type const & rhs) noexcept

     {

         return !rhs.at_end;

     }


 private:

     auto unpack(max_simd_type const & row) const

     {

         if constexpr (chunk_size == simd_traits<max_simd_type>::length / 2)  // upcast into 2 vectors.

         {

             return std::array{simd::upcast<simd_t>(extract_half<0>(row)),  // 1. half

                               simd::upcast<simd_t>(extract_half<1>(row))}; // 2. half

         }

         else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 4) // upcast into 4 vectors.

         {

             return std::array{simd::upcast<simd_t>(extract_quarter<0>(row)),  // 1. quarter

                               simd::upcast<simd_t>(extract_quarter<1>(row)),  // 2. quarter

                               simd::upcast<simd_t>(extract_quarter<2>(row)),  // 3. quarter

                               simd::upcast<simd_t>(extract_quarter<3>(row))}; // 4. quarter

         }

         else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 8) // upcast into 8 vectors.

         {

             return std::array{simd::upcast<simd_t>(extract_eighth<0>(row)),   // 1. eighth

                               simd::upcast<simd_t>(extract_eighth<1>(row)),   // 2. eighth

                               simd::upcast<simd_t>(extract_eighth<2>(row)),   // 3. eighth

                               simd::upcast<simd_t>(extract_eighth<3>(row)),   // 4. eighth

                               simd::upcast<simd_t>(extract_eighth<4>(row)),   // 5. eighth

                               simd::upcast<simd_t>(extract_eighth<5>(row)),   // 6. eighth

                               simd::upcast<simd_t>(extract_eighth<6>(row)),   // 7. eighth

                               simd::upcast<simd_t>(extract_eighth<7>(row))};  // 8. eighth

         }

         else

         {

             return std::array{simd::upcast<simd_t>(row)};

         }

     }


     constexpr void split_into_sub_matrices(std::array<max_simd_type, simd_traits<max_simd_type>::length> matrix) const

     {

         auto apply_padding = [this] (simd_t const vec)

         {

             return (vec == simd::fill<simd_t>(static_cast<uint8_t>(~0))) ? this_view->padding_simd_vector : vec;

         };


         // Iterate over the rows of the matrix

         for (uint8_t row = 0; row < static_cast<uint8_t>(matrix.size()); ++row)

         {

             // split a row into multiple chunks of size `chunk_size`

             auto chunked_row = unpack(matrix[row]);


             if constexpr (chunked_row.size() == 1)

             {

                 this_view->cached_simd_chunks[0][row] = apply_padding(std::move(chunked_row[0]));

             }

             else // Parse the tuple elements and store them in the cached simd chunks.

             {

                 static_assert(chunked_row.size() == chunks_per_load, "Expected chunks_per_load many simd vectors.");


                 for (uint8_t chunk = 0; chunk < chunks_per_load; ++chunk)  // store chunks in respective cached entries.

                 {

                     size_t idx = chunk * chunks_per_load + row / chunk_size;

                     this_view->cached_simd_chunks[idx][row % chunk_size] = apply_padding(std::move(chunked_row[chunk]));

                 }

             }

         }

     }


     constexpr bool all_iterators_reached_sentinel() const noexcept

     {

         using std::get;


         return std::ranges::all_of(views::zip(cached_iter, cached_sentinel), [] (auto && iterator_sentinel_pair)

         {

             return get<0>(iterator_sentinel_pair) == get<1>(iterator_sentinel_pair);

         });

     }


     constexpr simd_t convert_single_column()

         noexcept

     {

         simd_t simd_column{};

         for (size_t idx = 0u; idx < chunk_size; ++idx)

         {

             if (cached_iter[idx] == cached_sentinel[idx])

             {

                 simd_column[idx] = this_view->padding_value;

             }

             else

             {

                 simd_column[idx] = static_cast<scalar_type>(seqan3::to_rank(*cached_iter[idx]));

                 ++cached_iter[idx];

             }

         };

         return simd_column;

     }


     template <typename array_t>

     constexpr void update_final_chunk_position(array_t const & iterators_before_update) noexcept

     {

         size_t max_distance = 0;

         for (auto && [it, sent] : views::zip(iterators_before_update, cached_sentinel))

             max_distance = std::max<size_t>(std::ranges::distance(it, sent), max_distance);


         assert(max_distance > 0);

         assert(max_distance <= (total_chunks * chunk_size));


         --max_distance;

         final_chunk_pos = max_distance  / chunk_size;

         // first we should be able to check the chunk position.

         final_chunk_size = (max_distance % chunk_size) + 1;

     }


     constexpr void underflow()

         requires fast_load

     {

         at_end = final_chunk;

         if (at_end)  // reached end of stream.

             return;

         // For the efficient load we assume at most one byte sized alphabets.

         // Hence we can load `simd_traits<simd_t>::max_length` length many elements at once.

         // Depending on the packing of `simd_t` we can prefetch blocks and store them in the `cached_simd_chunks`.

         // E.g. assume `simd_t` with length 8 on SSE4 with max length 16.

         // To fill the 16x16 matrix we need four 8x8 matrices.

         // Thus, for the 8 sequences we need to load two times 16 consecutive bytes to fill the matrix, i.e. two loads

         // see figure below.

         //

         //       0    1  ...    7 |   8    9  ...   15

         // 0  [a00, a01, ..., a07]|[a08, a09, ..., a15]  // first load of seq a reads 16 characters

         // 1  [b00, b01, ..., b07]|[b08, b09, ..., b15]  // first load of seq b reads 16 characters

         //            ...         |        ...

         // 7  [g00, g01, ..., g07]|[g08, g09, ..., g15]  // first load of seq g reads 16 characters

         //    ----------------------------------------

         // 8  [a16, a17, ..., a23]|[a24, a25, ..., a31]  // second load of seq a reads next 16 characters

         // 9  [b16, b17, ..., b23]|[b24, b25, ..., b31]  // second load of seq b reads next 16 characters

         //            ...         |        ...

         // 15 [g16, g17, ..., g23]|[g24, g25, ..., g31]  // second load of seq g reads next 16 characters

         //

         // This quadratic byte matrix can be transposed efficiently with simd instructions.

         // If the target simd scalar type is bigger we can apply the same mechanism but have then 16 4x4 matrices

         // (32 bit) or 256 2x2 matrices (64 bit).


         constexpr int8_t max_size = simd_traits<simd_t>::max_length;

         std::array<max_simd_type, max_size> matrix{};

         decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.

         // Iterate over each sequence.

         for (uint8_t sequence_pos = 0; sequence_pos < chunk_size; ++sequence_pos)

         {  // Iterate over each block depending on the packing of the target simd vector.

             for (uint8_t chunk_pos = 0; chunk_pos < chunks_per_load; ++chunk_pos)

             {

                 uint8_t pos = chunk_pos * chunk_size + sequence_pos; // matrix entry to fill

                 if (cached_sentinel[sequence_pos] - cached_iter[sequence_pos] >= max_size)

                 { // Not in final block, thus load directly from memory.

                     matrix[pos] = simd::load<max_simd_type>(std::addressof(*cached_iter[sequence_pos]));

                     std::advance(cached_iter[sequence_pos], max_size);

                 }

                 else  // Loads the final block byte wise in order to not load from uninitialised memory.

                 {

                     matrix[pos] = simd::fill<max_simd_type>(~0);

                     auto & sequence_it = cached_iter[sequence_pos];

                     for (int8_t idx = 0; sequence_it != cached_sentinel[sequence_pos]; ++sequence_it, ++idx)

                         matrix[pos][idx] = seqan3::to_rank(*sequence_it);

                 }

             }

         }


         // Handle final chunk which might not end at an offset which is not a multiple of `chunk_size`.

         final_chunk = all_iterators_reached_sentinel();


         if (final_chunk)

             update_final_chunk_position(iterators_before_update);


         simd::transpose(matrix);

         split_into_sub_matrices(std::move(matrix));

     }


     constexpr void underflow()

         requires (!fast_load)

     {

         at_end = final_chunk;

         if (at_end)  // reached end of stream.

             return;


         decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.

         for (size_t i = 0; i < chunk_size; ++i)

             this_view->cached_simd_chunks[0][i] = convert_single_column();


         final_chunk = all_iterators_reached_sentinel();


         if (final_chunk)

             update_final_chunk_position(iterators_before_update);

     }


     std::array<std::ranges::iterator_t<inner_range_type>, chunk_size> cached_iter{};

     std::array<std::ranges::sentinel_t<inner_range_type>, chunk_size> cached_sentinel{};

     view_to_simd * this_view{nullptr};

     uint8_t final_chunk_size{chunk_size};

     uint8_t final_chunk_pos{total_chunks - 1};

     uint8_t current_chunk_pos{0};

     bool final_chunk{true};

     bool at_end{true};

 };


 // ============================================================================

 //  to_simd_fn (adaptor definition)

 // ============================================================================


 template <simd::simd_concept simd_t>

 struct to_simd_fn

 {

     using padding_t = typename simd_traits<simd_t>::scalar_type;


     constexpr auto operator()(padding_t const padding_value) const noexcept

     {

         return detail::adaptor_from_functor{*this, padding_value};

     }


     constexpr auto operator()() const noexcept

     {

         return detail::adaptor_from_functor{*this};

     }


     template <std::ranges::range urng_t>

     constexpr auto operator()(urng_t && urange, padding_t const padding_value) const noexcept

     {

         static_assert(std::ranges::forward_range<urng_t>,

             "The underlying range in views::to_simd must model std::ranges::forward_range.");

         static_assert(std::ranges::viewable_range<urng_t>,

             "The underlying range in views::to_simd must model std::ranges::viewable_range.");

         static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,

             "The value type of the underlying range must model std::ranges::input_range.");

         static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,

             "The value type of the inner ranges must model seqan3::semialphabet.");


         return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange), padding_value};

     }


     template <std::ranges::range urng_t>

     constexpr auto operator()(urng_t && urange) const noexcept

     {

         static_assert(std::ranges::forward_range<urng_t>,

             "The underlying range in views::to_simd must model std::ranges::forward_range.");

         static_assert(std::ranges::viewable_range<urng_t>,

             "The underlying range in views::to_simd must model std::ranges::viewable_range.");

         static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,

             "The value type of the underlying range must model std::ranges::input_range.");

         static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,

             "The value type of the inner ranges must model seqan3::semialphabet.");


         return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange)};

     }


     template <std::ranges::range urng_t>

     constexpr friend auto operator|(urng_t && urange, to_simd_fn const & me)

     {

         return me(std::forward<urng_t>(urange));

     }

 };


 } // namespace seqan3::detail


 namespace seqan3::views

 {


 template <simd::simd_concept simd_t>

 inline constexpr auto to_simd = detail::to_simd_fn<simd_t>{};


 } // namespace seqan3::views

std::addressof
T addressof(T... args)

std::advance
T advance(T... args)

algorithm.hpp
Provides algorithms to modify seqan3::simd::simd_type.

algorithm
Adaptations of algorithms from the Ranges TS.

std::array

std::begin
T begin(T... args)

type_traits.hpp
Provides various transformation traits used by the range module.

template_inspection.hpp
Provides type traits for working with templates.

std::end
T end(T... args)

std::fill
T fill(T... args)

seqan3::alphabet_size
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:858

seqan3::to_rank
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155

seqan3::operator|
auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1104

seqan3::pack_traits::size
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150

seqan3::views::chunk
constexpr auto chunk
A chunk view.
Definition: chunk.hpp:29

seqan3::views::get
auto const get
A view calling std::get on each element in a range.
Definition: get.hpp:66

seqan3::views::zip
constexpr auto zip
A zip view.
Definition: zip.hpp:29

seqan3::views::move
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70

seqan3::views::type_reduce
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:158

semialphabet
The basis for seqan3::alphabet, but requires only rank interface (not char).

std::invalid_argument

iterator
Provides C++20 additions to the <iterator> header.

std::input_iterator_tag

seqan3::views
The SeqAn namespace for views.
Definition: char_to.hpp:22

seqan3::views::to_simd
constexpr auto to_simd
A view that transforms a range of ranges into chunks of seqan3::simd vectors.
Definition: to_simd.hpp:790

std
SeqAn specific customisations in the standard namespace.

std::rel_ops::operator!=
T operator!=(T... args)

pack_algorithm.hpp
Provides algorithms for meta programming, parameter packs and seqan3::type_list.

ranges
Adaptations of concepts from the Ranges TS.

std::remove_cvref_t

std::span

type_reduce.hpp
Provides seqan3::views::type_reduce.

concept.hpp
Provides seqan3::simd::simd_concept.

simd.hpp
Provides seqan3::simd::simd_type.

simd_traits.hpp
Provides seqan3::simd::simd_traits.

zip.hpp
Provides seqan3::views::zip.