ex_SemiglobalAlignerΒΆ

Example that calculates semiglobal alignment i.e. the optimal global alignment where trailing gaps are not penalized. The program also shows how one can define its own scoring function to calculate an alignment

USAGE:

./ex_PairwiseAlignment

Keywords:

Categories:

  • core::alignment::SemiglobalAligner; core::alignment::PairwiseSequenceAlignment

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include <iostream>
#include <chrono>
#include <algorithm>

#include <core/data/io/fasta_io.hh>

#include <core/data/sequence/Sequence.hh>
#include <core/alignment/SemiglobalAligner.hh>
#include <core/alignment/PairwiseAlignment.hh>
#include <core/alignment/PairwiseSequenceAlignment.hh>
#include <core/alignment/scoring/SimilarityMatrix.hh>
#include <core/alignment/scoring/SimilarityMatrixScore.hh>
#include <utils/options/OptionParser.hh>

std::string program_info = R"(

Example that calculates semiglobal alignment i.e. the optimal global alignment where trailing
gaps are not penalized. The program also shows how  one can define its own scoring function
to calculate an alignment

USAGE:
./ex_PairwiseAlignment

)";

using namespace core::data::sequence;
using namespace core::alignment::scoring;

/// An example score function used by BioShell pairwise sequence alignment methods.
/** Such a scoring object must provide three components:
 *   - a scoring operator, whose arguments are positions in the scored sequences (query and template, respectively)
 *   - query_length() method, and
 *   - tmplt_length() method
 */
struct IdentityScore {
  IdentityScore(const Sequence & query,const Sequence & tmplt) : q(query.sequence), t(tmplt.sequence) {}

  /// Alignment score is 1 when the two compared letters are identical and 0 otherwise
  short operator()(const core::index2 i,const core::index2 j) const { return q[i]==t[j]; }
  /// Returns the length of a template sequence
  core::index2 tmplt_length() const { return t.length(); }
  /// Returns the length of a query sequence
  core::index2 query_length() const { return q.length(); }

  const std::string & q;
  const std::string & t;
};

/** @brief  Calculate a pairwise sequence alignment between two sequences with identity scoring method.
 *
 * The program calculates semiglobal alignment i.e. the optimal global alignment where trailing gaps are not penalized.
 * The program also shows how  one can define its own scoring function to calculate an alignment
 *
 * CATEGORIES: core::alignment::SemiglobalAligner; core::alignment::PairwiseSequenceAlignment
 * KEYWORDS:   sequence alignment
 */
int main(const int argc, const char* argv[]) {

  if ((argc > 1) && utils::options::call_for_help(argv[1]))
    utils::exit_OK_with_message(program_info);
  
  Sequence_SP query = std::make_shared<Sequence>("query","CATACGTCGACGGCT",1);
  Sequence_SP tmplt = std::make_shared<Sequence>("tmplt","ACGACGT",1);

  // --- create aligner object
  core::index2 max_len = std::max(query->length(),tmplt->length());
  core::alignment::SemiglobalAligner<short, IdentityScore> aligner(max_len);

  // --- find score of the alignment; just the score - this is faster than aligning and keeping backtracking info
  IdentityScore s(*query, *tmplt);
  short result1 = aligner.align_for_score(-10, -1, s);

  short result2 = aligner.align(-10, -1, s);
  core::alignment::PairwiseAlignment_SP ali = aligner.backtrace();
  std::cerr << ali->query_length() << " " << query->sequence << " " << ali->template_length() << " " << tmplt->sequence
      << "\n";
  core::alignment::PairwiseSequenceAlignment seq_ali(ali,query,tmplt);
  IdentityScore s2(*tmplt, *query);
  short result3 = aligner.align(-10, -1, s2);
  std::cout << "The three scores below should be identical:\n" << result1 << " " << result2 << " " << result3 << "\n"
      << seq_ali << "\n";
}
../_images/file_icon.png