SeqAn3  3.0.3
The Modern C++ library for sequence analysis.
to_simd.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2020, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2020, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/algorithm>
16 #include <seqan3/std/iterator>
17 #include <seqan3/std/ranges>
18 
28 
29 namespace seqan3::detail
30 {
31 
57 template <std::ranges::view urng_t, simd::simd_concept simd_t>
58 class view_to_simd : public std::ranges::view_interface<view_to_simd<urng_t, simd_t>>
59 {
60 private:
61 
62  static_assert(std::ranges::forward_range<urng_t>,
63  "The underlying range must model forward_range.");
64  static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
65  "Expects the value type of the underlying range to be an input_range.");
66  static_assert(std::default_initializable<std::ranges::range_value_t<urng_t>>,
67  "Expects the inner range to be default constructible.");
68  static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,
69  "Expects semi-alphabet as value type of the inner range.");
70 
74  using inner_range_type = std::ranges::range_value_t<urng_t>;
76  using scalar_type = typename simd_traits<simd_t>::scalar_type;
78  using max_simd_type = simd_type_t<uint8_t, simd_traits<simd_t>::max_length>;
80 
85  static constexpr bool fast_load = std::ranges::contiguous_range<inner_range_type> &&
86  std::sized_sentinel_for<std::ranges::iterator_t<inner_range_type>,
87  std::ranges::sentinel_t<inner_range_type>> &&
88  sizeof(alphabet_rank_t<std::ranges::range_value_t<inner_range_type>>) == 1;
89 
91  static constexpr uint8_t chunk_size = simd_traits<simd_t>::length;
93  static constexpr uint8_t chunks_per_load = simd_traits<simd_t>::max_length / chunk_size;
95  static constexpr uint8_t total_chunks = fast_load ? (chunks_per_load * chunks_per_load) : 1;
97  static constexpr auto alphabet_size = alphabet_size<std::ranges::range_value_t<inner_range_type>>;
99 
100  // Forward declare class' iterator type. See definition below.
101  struct iterator_type;
102 
103 public:
104 
108  constexpr view_to_simd() = default;
109  constexpr view_to_simd(view_to_simd const &) = default;
110  constexpr view_to_simd(view_to_simd &&) = default;
111  constexpr view_to_simd & operator=(view_to_simd const &) = default;
112  constexpr view_to_simd & operator=(view_to_simd &&) = default;
113  ~view_to_simd() = default;
114 
119  constexpr view_to_simd(urng_t urng, scalar_type const padding_value = alphabet_size) :
120  urng{std::move(urng)},
121  padding_simd_vector{simd::fill<simd_t>(padding_value)},
122  padding_value{padding_value}
123  {
124  // Check if the size is less or equal the simd size.
125  if (std::ranges::distance(urng) > chunk_size)
126  throw std::invalid_argument{"The size of the underlying range must be less than or equal to the size of "
127  "the given simd type!"};
128  }
129 
131  template <typename other_urng_t>
133  requires (!std::same_as<std::remove_cvref_t<other_urng_t>, view_to_simd>) &&
134  (!std::same_as<other_urng_t, urng_t>) &&
135  std::ranges::viewable_range<other_urng_t>
137  constexpr view_to_simd(other_urng_t && urng, scalar_type const padding_value = alphabet_size) :
138  view_to_simd{views::type_reduce(std::forward<other_urng_t>(urng)), padding_value}
139  {}
141 
146  constexpr iterator_type begin() noexcept
147  {
148  return {*this};
149  }
150 
152  constexpr void begin() const noexcept = delete;
153 
155  constexpr std::default_sentinel_t end() noexcept
156  {
157  return std::default_sentinel;
158  }
159 
161  constexpr void end() const noexcept = delete;
163 
165  constexpr bool empty() const noexcept
167  requires std::ranges::forward_range<inner_range_type>
169  {
170  return std::ranges::all_of(urng, [] (auto & rng)
171  {
172  return std::ranges::empty(rng);
173  });
174  }
175 
182  constexpr size_t size() const noexcept
184  requires std::ranges::sized_range<inner_range_type>
186  {
187  auto it = std::ranges::max_element(urng, [] (auto & lhs, auto & rhs)
188  {
189  return std::ranges::size(lhs) < std::ranges::size(rhs);
190  });
191 
192  return (it != std::ranges::end(urng)) ? (std::ranges::size(*it) + chunk_size - 1) / chunk_size : 0;
193  }
194 
195 private:
196 
197  urng_t urng{};
198  std::array<chunk_type, total_chunks> cached_simd_chunks{};
199  simd_t padding_simd_vector{};
200  scalar_type padding_value{};
201 };
202 
210 template <std::ranges::view urng_t, simd::simd_concept simd_t>
211 class view_to_simd<urng_t, simd_t>::iterator_type
212 {
213 public:
218  using value_type = reference;
219  using pointer = void;
220  using difference_type = ptrdiff_t;
221  using iterator_category = std::input_iterator_tag;
222  using iterator_concept = iterator_category;
224 
228  constexpr iterator_type() = default;
229  constexpr iterator_type(iterator_type const &) = default;
230  constexpr iterator_type(iterator_type &&) = default;
231  constexpr iterator_type & operator=(iterator_type const &) = default;
232  constexpr iterator_type & operator=(iterator_type &&) = default;
233  ~iterator_type() = default;
234 
243  constexpr iterator_type(view_to_simd & this_view) : this_view{&this_view}, current_chunk_pos{0}
244  {
245  // Initialise the iterator of the sub ranges.
246  size_t seq_id = 0;
247  for (auto it = std::ranges::begin(this_view.urng); it != std::ranges::end(this_view.urng); ++it, ++seq_id)
248  {
249  cached_iter[seq_id] = std::ranges::begin(*it);
250  cached_sentinel[seq_id] = std::ranges::end(*it);
251  }
252 
253  // The batch is empty and by default the constructed iterator is pointing to the end.
254  if (seq_id == 0)
255  return;
256 
257  // The batch is not empty but might not be full either.
258  // If a slot is supposed to be empty, it will be initialised with the iterator of the first sequence set to the
259  // end emulating an empty sequence.
260  auto sentinel_it = std::ranges::next(cached_iter[0], cached_sentinel[0]);
261  for (; seq_id < chunk_size; ++seq_id)
262  {
263  cached_iter[seq_id] = sentinel_it;
264  cached_sentinel[seq_id] = cached_sentinel[0];
265  }
266 
267  // Check if this is the final chunk already.
268  final_chunk = all_iterators_reached_sentinel();
269 
270  // Fetch the next available input characters from the sequences and transform them into simd vectors.
271  underflow();
272  }
274 
279  constexpr reference operator*() const noexcept
280  {
281  assert(this_view != nullptr);
282  return std::span{this_view->cached_simd_chunks[current_chunk_pos].begin(),
283  (current_chunk_pos == final_chunk_pos) ? final_chunk_size : chunk_size};
284  }
286 
291  constexpr iterator_type & operator++(/*pre-increment*/)
292  {
293  if constexpr (fast_load)
294  { // Check if cached chunks have been already consumed and we need to fetch the next chunks.
295  if (current_chunk_pos == final_chunk_pos)
296  {
297  underflow();
298  current_chunk_pos = 0;
299  }
300  else
301  {
302  ++current_chunk_pos;
303  }
304  }
305  else // In case fast load is not available only one chunk is filled at a time.
306  {
307  underflow();
308  }
309 
310  return *this;
311  }
312 
314  constexpr value_type operator++(int /*post-increment*/)
315  {
316  value_type tmp = this->operator*();
317  ++(*this);
318  return tmp;
319  }
321 
326  constexpr bool operator==(std::default_sentinel_t const &) const noexcept
327  {
328  return at_end;
329  }
330 
332  friend constexpr bool operator==(std::default_sentinel_t const &, iterator_type const & rhs) noexcept
333  {
334  return rhs.at_end;
335  }
336 
338  constexpr bool operator!=(std::default_sentinel_t const &) const noexcept
339  {
340  return !at_end;
341  }
342 
344  friend constexpr bool operator!=(std::default_sentinel_t const &, iterator_type const & rhs) noexcept
345  {
346  return !rhs.at_end;
347  }
349 
350 private:
361  auto unpack(max_simd_type const & row) const
362  {
363  if constexpr (chunk_size == simd_traits<max_simd_type>::length / 2) // upcast into 2 vectors.
364  {
365  return std::array{simd::upcast<simd_t>(extract_half<0>(row)), // 1. half
366  simd::upcast<simd_t>(extract_half<1>(row))}; // 2. half
367  }
368  else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 4) // upcast into 4 vectors.
369  {
370  return std::array{simd::upcast<simd_t>(extract_quarter<0>(row)), // 1. quarter
371  simd::upcast<simd_t>(extract_quarter<1>(row)), // 2. quarter
372  simd::upcast<simd_t>(extract_quarter<2>(row)), // 3. quarter
373  simd::upcast<simd_t>(extract_quarter<3>(row))}; // 4. quarter
374  }
375  else if constexpr (chunk_size == simd_traits<max_simd_type>::length / 8) // upcast into 8 vectors.
376  {
377  return std::array{simd::upcast<simd_t>(extract_eighth<0>(row)), // 1. eighth
378  simd::upcast<simd_t>(extract_eighth<1>(row)), // 2. eighth
379  simd::upcast<simd_t>(extract_eighth<2>(row)), // 3. eighth
380  simd::upcast<simd_t>(extract_eighth<3>(row)), // 4. eighth
381  simd::upcast<simd_t>(extract_eighth<4>(row)), // 5. eighth
382  simd::upcast<simd_t>(extract_eighth<5>(row)), // 6. eighth
383  simd::upcast<simd_t>(extract_eighth<6>(row)), // 7. eighth
384  simd::upcast<simd_t>(extract_eighth<7>(row))}; // 8. eighth
385  }
386  else
387  {
388  return std::array{simd::upcast<simd_t>(row)};
389  }
390  }
391 
402  constexpr void split_into_sub_matrices(std::array<max_simd_type, simd_traits<max_simd_type>::length> matrix) const
403  {
404  auto apply_padding = [this] (simd_t const vec)
405  {
406  return (vec == simd::fill<simd_t>(static_cast<uint8_t>(~0))) ? this_view->padding_simd_vector : vec;
407  };
408 
409  // Iterate over the rows of the matrix
410  for (uint8_t row = 0; row < static_cast<uint8_t>(matrix.size()); ++row)
411  {
412  // split a row into multiple chunks of size `chunk_size`
413  auto chunked_row = unpack(matrix[row]);
414 
415  if constexpr (chunked_row.size() == 1)
416  {
417  this_view->cached_simd_chunks[0][row] = apply_padding(std::move(chunked_row[0]));
418  }
419  else // Parse the tuple elements and store them in the cached simd chunks.
420  {
421  static_assert(chunked_row.size() == chunks_per_load, "Expected chunks_per_load many simd vectors.");
422 
423  for (uint8_t chunk = 0; chunk < chunks_per_load; ++chunk) // store chunks in respective cached entries.
424  {
425  size_t idx = chunk * chunks_per_load + row / chunk_size;
426  this_view->cached_simd_chunks[idx][row % chunk_size] = apply_padding(std::move(chunked_row[chunk]));
427  }
428  }
429  }
430  }
431 
435  constexpr bool all_iterators_reached_sentinel() const noexcept
436  {
437  using std::get;
438 
439  return std::ranges::all_of(views::zip(cached_iter, cached_sentinel), [] (auto && iterator_sentinel_pair)
440  {
441  return get<0>(iterator_sentinel_pair) == get<1>(iterator_sentinel_pair);
442  });
443  }
444 
455  constexpr simd_t convert_single_column()
456  noexcept
457  {
458  simd_t simd_column{};
459  for (size_t idx = 0u; idx < chunk_size; ++idx)
460  {
461  if (cached_iter[idx] == cached_sentinel[idx])
462  {
463  simd_column[idx] = this_view->padding_value;
464  }
465  else
466  {
467  simd_column[idx] = static_cast<scalar_type>(seqan3::to_rank(*cached_iter[idx]));
468  ++cached_iter[idx];
469  }
470  };
471  return simd_column;
472  }
473 
484  template <typename array_t>
485  constexpr void update_final_chunk_position(array_t const & iterators_before_update) noexcept
486  {
487  size_t max_distance = 0;
488  for (auto && [it, sent] : views::zip(iterators_before_update, cached_sentinel))
489  max_distance = std::max<size_t>(std::ranges::distance(it, sent), max_distance);
490 
491  assert(max_distance > 0);
492  assert(max_distance <= (total_chunks * chunk_size));
493 
494  --max_distance;
495  final_chunk_pos = max_distance / chunk_size;
496  // first we should be able to check the chunk position.
497  final_chunk_size = (max_distance % chunk_size) + 1;
498  }
499 
501  constexpr void underflow()
503  requires fast_load
505  {
506  at_end = final_chunk;
507  if (at_end) // reached end of stream.
508  return;
509  // For the efficient load we assume at most one byte sized alphabets.
510  // Hence we can load `simd_traits<simd_t>::max_length` length many elements at once.
511  // Depending on the packing of `simd_t` we can prefetch blocks and store them in the `cached_simd_chunks`.
512  // E.g. assume `simd_t` with length 8 on SSE4 with max length 16.
513  // To fill the 16x16 matrix we need four 8x8 matrices.
514  // Thus, for the 8 sequences we need to load two times 16 consecutive bytes to fill the matrix, i.e. two loads
515  // see figure below.
516  //
517  // 0 1 ... 7 | 8 9 ... 15
518  // 0 [a00, a01, ..., a07]|[a08, a09, ..., a15] // first load of seq a reads 16 characters
519  // 1 [b00, b01, ..., b07]|[b08, b09, ..., b15] // first load of seq b reads 16 characters
520  // ... | ...
521  // 7 [g00, g01, ..., g07]|[g08, g09, ..., g15] // first load of seq g reads 16 characters
522  // ----------------------------------------
523  // 8 [a16, a17, ..., a23]|[a24, a25, ..., a31] // second load of seq a reads next 16 characters
524  // 9 [b16, b17, ..., b23]|[b24, b25, ..., b31] // second load of seq b reads next 16 characters
525  // ... | ...
526  // 15 [g16, g17, ..., g23]|[g24, g25, ..., g31] // second load of seq g reads next 16 characters
527  //
528  // This quadratic byte matrix can be transposed efficiently with simd instructions.
529  // If the target simd scalar type is bigger we can apply the same mechanism but have then 16 4x4 matrices
530  // (32 bit) or 256 2x2 matrices (64 bit).
531 
532  constexpr int8_t max_size = simd_traits<simd_t>::max_length;
534  decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
535  // Iterate over each sequence.
536  for (uint8_t sequence_pos = 0; sequence_pos < chunk_size; ++sequence_pos)
537  { // Iterate over each block depending on the packing of the target simd vector.
538  for (uint8_t chunk_pos = 0; chunk_pos < chunks_per_load; ++chunk_pos)
539  {
540  uint8_t pos = chunk_pos * chunk_size + sequence_pos; // matrix entry to fill
541  if (cached_sentinel[sequence_pos] - cached_iter[sequence_pos] >= max_size)
542  { // Not in final block, thus load directly from memory.
543  matrix[pos] = simd::load<max_simd_type>(std::addressof(*cached_iter[sequence_pos]));
544  std::advance(cached_iter[sequence_pos], max_size);
545  }
546  else // Loads the final block byte wise in order to not load from uninitialised memory.
547  {
548  matrix[pos] = simd::fill<max_simd_type>(~0);
549  auto & sequence_it = cached_iter[sequence_pos];
550  for (int8_t idx = 0; sequence_it != cached_sentinel[sequence_pos]; ++sequence_it, ++idx)
551  matrix[pos][idx] = seqan3::to_rank(*sequence_it);
552  }
553  }
554  }
555 
556  // Handle final chunk which might not end at an offset which is not a multiple of `chunk_size`.
557  final_chunk = all_iterators_reached_sentinel();
558 
559  if (final_chunk)
560  update_final_chunk_position(iterators_before_update);
561 
562  simd::transpose(matrix);
563  split_into_sub_matrices(std::move(matrix));
564  }
565 
567  constexpr void underflow()
569  requires (!fast_load)
571  {
572  at_end = final_chunk;
573  if (at_end) // reached end of stream.
574  return;
575 
576  decltype(cached_iter) iterators_before_update{cached_iter}; // Keep track of iterators before the update.
577  for (size_t i = 0; i < chunk_size; ++i)
578  this_view->cached_simd_chunks[0][i] = convert_single_column();
579 
580  final_chunk = all_iterators_reached_sentinel();
581 
582  if (final_chunk)
583  update_final_chunk_position(iterators_before_update);
584  }
585 
587  std::array<std::ranges::iterator_t<inner_range_type>, chunk_size> cached_iter{};
589  std::array<std::ranges::sentinel_t<inner_range_type>, chunk_size> cached_sentinel{};
591  view_to_simd * this_view{nullptr};
593  uint8_t final_chunk_size{chunk_size};
595  uint8_t final_chunk_pos{total_chunks - 1};
597  uint8_t current_chunk_pos{0};
599  bool final_chunk{true};
601  bool at_end{true};
602 };
603 
604 // ============================================================================
605 // to_simd_fn (adaptor definition)
606 // ============================================================================
607 
616 template <simd::simd_concept simd_t>
617 struct to_simd_fn
618 {
620  using padding_t = typename simd_traits<simd_t>::scalar_type;
621 
625  constexpr auto operator()(padding_t const padding_value) const noexcept
626  {
627  return detail::adaptor_from_functor{*this, padding_value};
628  }
629 
631  constexpr auto operator()() const noexcept
632  {
633  return detail::adaptor_from_functor{*this};
634  }
635 
641  template <std::ranges::range urng_t>
642  constexpr auto operator()(urng_t && urange, padding_t const padding_value) const noexcept
643  {
644  static_assert(std::ranges::forward_range<urng_t>,
645  "The underlying range in views::to_simd must model std::ranges::forward_range.");
646  static_assert(std::ranges::viewable_range<urng_t>,
647  "The underlying range in views::to_simd must model std::ranges::viewable_range.");
648  static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
649  "The value type of the underlying range must model std::ranges::input_range.");
650  static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,
651  "The value type of the inner ranges must model seqan3::semialphabet.");
652 
653  return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange), padding_value};
654  }
655 
660  template <std::ranges::range urng_t>
661  constexpr auto operator()(urng_t && urange) const noexcept
662  {
663  static_assert(std::ranges::forward_range<urng_t>,
664  "The underlying range in views::to_simd must model std::ranges::forward_range.");
665  static_assert(std::ranges::viewable_range<urng_t>,
666  "The underlying range in views::to_simd must model std::ranges::viewable_range.");
667  static_assert(std::ranges::input_range<std::ranges::range_value_t<urng_t>>,
668  "The value type of the underlying range must model std::ranges::input_range.");
669  static_assert(semialphabet<std::ranges::range_value_t<std::ranges::range_value_t<urng_t>>>,
670  "The value type of the inner ranges must model seqan3::semialphabet.");
671 
672  return view_to_simd<type_reduce_view<urng_t>, simd_t>{std::forward<urng_t>(urange)};
673  }
674 
676  template <std::ranges::range urng_t>
677  constexpr friend auto operator|(urng_t && urange, to_simd_fn const & me)
678  {
679  return me(std::forward<urng_t>(urange));
680  }
681 };
682 
683 } // namespace seqan3::detail
684 
685 namespace seqan3::views
686 {
687 
789 template <simd::simd_concept simd_t>
790 inline constexpr auto to_simd = detail::to_simd_fn<simd_t>{};
791 
792 } // namespace seqan3::views
T addressof(T... args)
T advance(T... args)
Provides algorithms to modify seqan3::simd::simd_type.
Adaptations of algorithms from the Ranges TS.
T begin(T... args)
Provides various transformation traits used by the range module.
Provides type traits for working with templates.
T end(T... args)
T fill(T... args)
constexpr auto alphabet_size
A type trait that holds the size of a (semi-)alphabet.
Definition: concept.hpp:858
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: concept.hpp:155
auto operator|(validator1_type &&vali1, validator2_type &&vali2)
Enables the chaining of validators.
Definition: validators.hpp:1104
constexpr size_t size
The size of a type pack.
Definition: traits.hpp:150
constexpr auto chunk
A chunk view.
Definition: chunk.hpp:29
auto const get
A view calling std::get on each element in a range.
Definition: get.hpp:66
constexpr auto zip
A zip view.
Definition: zip.hpp:29
auto const move
A view that turns lvalue-references into rvalue-references.
Definition: move.hpp:70
constexpr auto type_reduce
A view adaptor that behaves like std::views::all, but type erases certain ranges.
Definition: type_reduce.hpp:158
The basis for seqan3::alphabet, but requires only rank interface (not char).
Provides C++20 additions to the <iterator> header.
The SeqAn namespace for views.
Definition: char_to.hpp:22
constexpr auto to_simd
A view that transforms a range of ranges into chunks of seqan3::simd vectors.
Definition: to_simd.hpp:790
SeqAn specific customisations in the standard namespace.
T operator!=(T... args)
Provides algorithms for meta programming, parameter packs and seqan3::type_list.
Adaptations of concepts from the Ranges TS.
Provides seqan3::views::type_reduce.
Provides seqan3::simd::simd_concept.
Provides seqan3::simd::simd_type.
Provides seqan3::simd::simd_traits.
Provides seqan3::views::zip.