SeqAn3  3.1.0-rc.1
The Modern C++ library for sequence analysis.
alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2021, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2021, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <seqan3/std/iterator>
16 #include <memory>
17 #include <optional>
18 #include <seqan3/std/ranges>
19 #include <type_traits>
20 
38 
39 namespace seqan3::detail
40 {
41 
75 template <typename config_t, typename ...algorithm_policies_t>
76 class alignment_algorithm :
77  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
78 {
79 private:
80 
82  using traits_t = alignment_configuration_traits<config_t>;
84  using alignment_column_t = decltype(std::declval<alignment_algorithm>().current_alignment_column());
86  using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
88  using alignment_result_t = typename traits_t::alignment_result_type;
89 
90  static_assert(!std::same_as<alignment_result_t, empty_type>, "Alignment result type was not configured.");
91 
93  using score_debug_matrix_t =
94  std::conditional_t<traits_t::is_debug,
95  two_dimensional_matrix<std::optional<typename traits_t::original_score_type>,
97  matrix_major_order::column>,
98  empty_type>;
100  using trace_debug_matrix_t =
101  std::conditional_t<traits_t::is_debug,
102  two_dimensional_matrix<std::optional<trace_directions>,
104  matrix_major_order::column>,
105  empty_type>;
106 
107 public:
111  constexpr alignment_algorithm() = default;
112  constexpr alignment_algorithm(alignment_algorithm const &) = default;
113  constexpr alignment_algorithm(alignment_algorithm &&) = default;
114  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
115  constexpr alignment_algorithm & operator=(alignment_algorithm &&) = default;
116  ~alignment_algorithm() = default;
117 
126  explicit constexpr alignment_algorithm(config_t const & cfg) :
127  invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>{cfg}...,
128  cfg_ptr{std::make_shared<config_t>(cfg)}
129  {
130  this->scoring_scheme = seqan3::get<align_cfg::scoring_scheme>(*cfg_ptr).scheme;
131  this->initialise_alignment_state(*cfg_ptr);
132  }
134 
180  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
182  requires (!traits_t::is_vectorised) && std::invocable<callback_t, alignment_result_t>
184  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
185  {
186  using std::get;
187 
188  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
189  compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair), callback);
190  }
191 
193  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
195  requires traits_t::is_vectorised && std::invocable<callback_t, alignment_result_t>
197  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
198  {
199  assert(cfg_ptr != nullptr);
200 
201  static_assert(simd_concept<typename traits_t::score_type>, "Expected simd score type.");
202  static_assert(simd_concept<typename traits_t::trace_type>, "Expected simd trace type.");
203 
204  // Extract the batch of sequences for the first and the second sequence.
205  auto sequence1_range = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
206  auto sequence2_range = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
207 
208  // Initialise the find_optimum policy in the simd case.
209  this->initialise_find_optimum_policy(sequence1_range,
210  sequence2_range,
211  this->scoring_scheme.padding_match_score());
212 
213  // Convert batch of sequences to sequence of simd vectors.
214  auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
215  auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
216 
217  max_size_in_collection = std::pair{simd_sequences1.size(), simd_sequences2.size()};
218  // Reset the alignment state's optimum between executions of the alignment algorithm.
219  this->alignment_state.reset_optimum();
220 
221  compute_matrix(simd_sequences1, simd_sequences2);
222 
223  make_alignment_result(indexed_sequence_pairs, callback);
224  }
226 
227 private:
241  template <typename sequence_range_t>
242  constexpr auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
243  {
244  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
245 
246  using simd_score_t = typename traits_t::score_type;
247 
248  std::vector<simd_score_t, aligned_allocator<simd_score_t, alignof(simd_score_t)>> simd_sequence{};
249 
250  for (auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(this->scoring_scheme.padding_symbol))
251  for (auto && simd_vector : simd_vector_chunk)
252  simd_sequence.push_back(std::move(simd_vector));
253 
254  return simd_sequence;
255  }
256 
274  template <std::ranges::forward_range sequence1_t,
275  std::ranges::forward_range sequence2_t,
276  typename callback_t>
277  constexpr void compute_single_pair(size_t const idx,
278  sequence1_t && sequence1,
279  sequence2_t && sequence2,
280  callback_t & callback)
281  {
282  assert(cfg_ptr != nullptr);
283 
284  if constexpr (traits_t::is_debug)
285  initialise_debug_matrices(sequence1, sequence2);
286 
287  // Reset the alignment state's optimum between executions of the alignment algorithm.
288  this->alignment_state.reset_optimum();
289 
290  if constexpr (traits_t::is_banded)
291  {
292  using seqan3::get;
293  // Get the band and check if band configuration is valid.
294  auto const & band = get<align_cfg::band_fixed_size>(*cfg_ptr);
295  check_valid_band_parameter(sequence1, sequence2, band);
296  auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
297  // It would be great to use this interface here instead
298  compute_matrix(subsequence1, subsequence2, band);
299  make_alignment_result(idx, subsequence1, subsequence2, callback);
300  }
301  else
302  {
303  compute_matrix(sequence1, sequence2);
304  make_alignment_result(idx, sequence1, sequence2, callback);
305  }
306  }
307 
324  template <typename sequence1_t, typename sequence2_t>
325  constexpr void check_valid_band_parameter(sequence1_t && sequence1,
326  sequence2_t && sequence2,
327  align_cfg::band_fixed_size const & band)
328  {
329  static_assert(config_t::template exists<align_cfg::band_fixed_size>(),
330  "The band configuration is required for the banded alignment algorithm.");
331 
333  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
334 
335  if (static_cast<diff_type>(band.lower_diagonal) > std::ranges::distance(sequence1))
336  {
337  throw invalid_alignment_configuration
338  {
339  "Invalid band error: The lower diagonal excludes the whole alignment matrix."
340  };
341  }
342 
343  if (static_cast<diff_type>(band.upper_diagonal) < -std::ranges::distance(sequence2))
344  {
345  throw invalid_alignment_configuration
346  {
347  "Invalid band error: The upper diagonal excludes the whole alignment matrix."
348  };
349  }
350  }
351 
364  template <typename sequence1_t, typename sequence2_t>
365  constexpr void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
366  {
367  size_t rows = std::ranges::distance(sequence2) + 1;
368  size_t cols = std::ranges::distance(sequence1) + 1;
369 
370  score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
371  trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
372  }
373 
381  template <typename sequence1_t, typename sequence2_t>
382  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
384  requires (!traits_t::is_banded)
386  {
387  // ----------------------------------------------------------------------------
388  // Initialisation phase: allocate memory and initialise first column.
389  // ----------------------------------------------------------------------------
390 
391  this->allocate_matrix(sequence1, sequence2);
392  initialise_first_alignment_column(sequence2);
393 
394  // ----------------------------------------------------------------------------
395  // Recursion phase: compute column-wise the alignment matrix.
396  // ----------------------------------------------------------------------------
397 
398  for (auto const & alphabet1 : sequence1)
399  {
400  compute_alignment_column<true>(this->scoring_scheme_profile_column(alphabet1), sequence2);
401  finalise_last_cell_in_column(true);
402  }
403 
404  // ----------------------------------------------------------------------------
405  // Wrap up phase: track score in last column and prepare the alignment result.
406  // ----------------------------------------------------------------------------
407 
408  finalise_alignment();
409  }
410 
412  template <typename sequence1_t, typename sequence2_t>
413  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band)
415  requires traits_t::is_banded
417  {
418  // ----------------------------------------------------------------------------
419  // Initialisation phase: allocate memory and initialise first column.
420  // ----------------------------------------------------------------------------
421 
422  // Allocate and initialise first column.
423  this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
424  using row_index_t = std::ranges::range_difference_t<sequence2_t>;
425  row_index_t last_row_index = this->score_matrix.band_row_index;
426  initialise_first_alignment_column(std::views::take(sequence2, last_row_index));
427 
428  // ----------------------------------------------------------------------------
429  // 1st recursion phase: iterate as long as the band intersects with the first row.
430  // ----------------------------------------------------------------------------
431 
432  row_index_t sequence2_size = std::ranges::distance(sequence2);
433  for (auto const & seq1_value : std::views::take(sequence1, this->score_matrix.band_col_index))
434  {
435  compute_alignment_column<true>(seq1_value, std::views::take(sequence2, ++last_row_index));
436  // Only if band reached last row of matrix the last cell might be tracked.
437  finalise_last_cell_in_column(last_row_index >= sequence2_size);
438  }
439 
440  // ----------------------------------------------------------------------------
441  // 2nd recursion phase: iterate until the end of the matrix.
442  // ----------------------------------------------------------------------------
443 
444  size_t first_row_index = 0;
445  for (auto const & seq1_value : std::views::drop(sequence1, this->score_matrix.band_col_index))
446  {
447  // In the second phase the band moves in every column one base down on the second sequence.
448  compute_alignment_column<false>(seq1_value, sequence2 | views::slice(first_row_index++, ++last_row_index));
449  // Only if band reached last row of matrix the last cell might be tracked.
450  finalise_last_cell_in_column(last_row_index >= sequence2_size);
451  }
452 
453  // ----------------------------------------------------------------------------
454  // Wrap up phase: track score in last column and prepare the alignment result.
455  // ----------------------------------------------------------------------------
456 
457  finalise_alignment();
458  }
459 
472  template <typename sequence2_t>
473  auto initialise_first_alignment_column(sequence2_t && sequence2)
474  {
475  // Get the initial column.
476  alignment_column = this->current_alignment_column();
477  assert(!alignment_column.empty()); // Must contain at least one element.
478 
479  // Initialise first cell.
480  alignment_column_it = alignment_column.begin();
481  this->init_origin_cell(*alignment_column_it, this->alignment_state);
482 
483  // Initialise the remaining cells of this column.
484  for (auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
485  this->init_column_cell(*++alignment_column_it, this->alignment_state);
486 
487  // Finalise the last cell of the initial column.
488  bool at_last_row = true;
489  if constexpr (traits_t::is_banded) // If the band reaches until the last row of the matrix.
490  at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
491 
492  finalise_last_cell_in_column(at_last_row);
493  }
494 
510  template <bool initialise_first_cell, typename sequence1_value_t, typename sequence2_t>
511  void compute_alignment_column(sequence1_value_t const & seq1_value, sequence2_t && sequence2)
512  {
513  this->next_alignment_column(); // move to next column and set alignment column iterator accordingly.
514  alignment_column = this->current_alignment_column();
515  alignment_column_it = alignment_column.begin();
516 
517  auto seq2_it = std::ranges::begin(sequence2);
518 
519  if constexpr (initialise_first_cell) // Initialise first cell if it intersects with the first row of the matrix.
520  {
521  this->init_row_cell(*alignment_column_it, this->alignment_state);
522  }
523  else // Compute first cell of banded column if it does not intersect with the first row of the matrix.
524  {
525  this->compute_first_band_cell(*alignment_column_it,
526  this->alignment_state,
527  this->scoring_scheme.score(seq1_value, *seq2_it));
528  ++seq2_it;
529  }
530 
531  for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
532  this->compute_cell(*++alignment_column_it,
533  this->alignment_state,
534  this->scoring_scheme.score(seq1_value, *seq2_it));
535  }
536 
547  constexpr void finalise_last_cell_in_column(bool const at_last_row) noexcept
548  {
549  if (at_last_row)
550  this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
551 
552  if constexpr (traits_t::is_debug)
553  dump_alignment_column();
554  }
555 
557  constexpr void finalise_alignment() noexcept
558  {
559  // ----------------------------------------------------------------------------
560  // Check for the optimum in last cell/column.
561  // ----------------------------------------------------------------------------
562 
563  this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
564  this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
565  }
566 
593  template <typename index_t, typename sequence1_t, typename sequence2_t, typename callback_t>
595  requires (!traits_t::is_vectorised)
597  constexpr void make_alignment_result([[maybe_unused]] index_t const idx,
598  [[maybe_unused]] sequence1_t & sequence1,
599  [[maybe_unused]] sequence2_t & sequence2,
600  callback_t & callback)
601  {
602  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
603 
604  // ----------------------------------------------------------------------------
605  // Build the alignment result
606  // ----------------------------------------------------------------------------
607 
608  static_assert(seqan3::detail::alignment_configuration_traits<config_t>::has_output_configuration,
609  "The configuration must contain at least one align_cfg::output_* element.");
610 
611  result_value_t res{};
612 
613  if constexpr (traits_t::output_sequence1_id)
614  res.sequence1_id = idx;
615 
616  if constexpr (traits_t::output_sequence2_id)
617  res.sequence2_id = idx;
618 
619  // Choose what needs to be computed.
620  if constexpr (traits_t::compute_score)
621  res.score = this->alignment_state.optimum.score;
622 
623  if constexpr (traits_t::compute_end_positions)
624  {
625  using alignment_coordinate_t = detail::advanceable_alignment_coordinate<>;
626  res.end_positions = alignment_coordinate_t{column_index_type{this->alignment_state.optimum.column_index},
627  row_index_type{this->alignment_state.optimum.row_index}};
628  // At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
629  if constexpr (traits_t::is_banded)
630  res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
631  }
632 
633  if constexpr (traits_t::compute_begin_positions)
634  {
635  // Get a aligned sequence builder for banded or un-banded case.
636  aligned_sequence_builder builder{sequence1, sequence2};
637 
638  detail::matrix_coordinate const optimum_coordinate
639  {
640  detail::row_index_type{this->alignment_state.optimum.row_index},
641  detail::column_index_type{this->alignment_state.optimum.column_index}
642  };
643  auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
644  res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
645  res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
646 
647  if constexpr (traits_t::compute_sequence_alignment)
648  res.alignment = std::move(trace_res.alignment);
649  }
650 
651  // Store the matrices in debug mode.
652  if constexpr (traits_t::is_debug)
653  {
654  res.score_debug_matrix = std::move(score_debug_matrix);
655  if constexpr (traits_t::compute_sequence_alignment) // compute alignment
656  res.trace_debug_matrix = std::move(trace_debug_matrix);
657  }
658 
659  callback(std::move(res));
660  }
661 
687  template <typename indexed_sequence_pair_range_t, typename callback_t>
689  requires traits_t::is_vectorised
691  constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs,
692  callback_t & callback)
693  {
694  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
695 
696  size_t simd_index = 0;
697  for (auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
698  {
699  (void) sequence_pairs;
700  result_value_t res{};
701 
702  if constexpr (traits_t::output_sequence1_id)
703  res.sequence1_id = alignment_index;
704 
705  if constexpr (traits_t::output_sequence2_id)
706  res.sequence2_id = alignment_index;
707 
708  if constexpr (traits_t::compute_score)
709  res.score = this->alignment_state.optimum.score[simd_index]; // Just take this
710 
711  if constexpr (traits_t::compute_end_positions)
712  {
713  res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
714  res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
715  }
716 
717  callback(std::move(res));
718  ++simd_index;
719  }
720  }
721 
730  void dump_alignment_column()
731  {
732  using std::get;
733 
734  auto column = this->current_alignment_column();
735 
736  auto coord = get<1>(column.front()).coordinate;
737  if constexpr (traits_t::is_banded)
738  coord.second += coord.first - this->score_matrix.band_col_index;
739 
740  matrix_offset offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
741  column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
742 
743  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
744  {
745  using std::get;
746  return get<0>(tpl).current;
747  }), score_debug_matrix.begin() + offset);
748 
749  // if traceback is enabled.
750  if constexpr (traits_t::compute_sequence_alignment)
751  {
752  auto trace_matrix_it = trace_debug_matrix.begin() + offset;
753  std::ranges::copy(column | std::views::transform([] (auto const & tpl)
754  {
755  using std::get;
756  return get<1>(tpl).current;
757  }), trace_debug_matrix.begin() + offset);
758  }
759  }
760 
762  std::shared_ptr<config_t> cfg_ptr{};
764  alignment_column_t alignment_column{};
766  alignment_column_iterator_t alignment_column_it{};
768  score_debug_matrix_t score_debug_matrix{};
770  trace_debug_matrix_t trace_debug_matrix{};
772  std::pair<size_t, size_t> max_size_in_collection{};
773 };
774 
775 } // namespace seqan3::detail
Provides seqan3::detail::align_config_band.
Provides seqan3::align_cfg::scoring_scheme.
Provides seqan3::detail::align_result_selector.
Provides seqan3::aligned_allocator.
Provides seqan3::detail::aligned_sequence_builder.
Includes customized exception types for the alignment module .
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
T begin(T... args)
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::views::elements.
Provides seqan3::detail::empty_type.
Provides various type traits for use on functions.
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
typename decltype(detail::split_after< i >(list_t{}))::first_type take
Return a seqan3::type_list of the first n types in the input type list.
Definition: traits.hpp:368
typename decltype(detail::split_after< i >(list_t{}))::second_type drop
Return a seqan3::type_list of the types in the input type list, except the first n.
Definition: traits.hpp:388
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: traits.hpp:471
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:183
Provides C++20 additions to the <iterator> header.
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:429
T push_back(T... args)
Adaptations of concepts from the Ranges TS.
Provides seqan3::simd::simd_type.
Provides seqan3::simd::simd_traits.
Provides seqan3::detail::to_simd view.
Provides the declaration of seqan3::detail::trace_directions.
Provides seqan3::simd::simd_concept.