ap_align_profiles

Calculate global alignment between two sequence profiles with a gap penalty that depends on observed gap probabilities

USAGE:
ap_align_profiles d4proc1-A1.profile d4proc1-A2.profile [gap_open gap_extend]

Categories:

  • core/alignment/NWAlignerAnyGap

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <iostream>
#include <chrono>
#include <algorithm>

#include <core/data/io/fasta_io.hh>

#include <core/alignment/NWAlignerAnyGap.hh>
#include <core/alignment/PairwiseSequenceAlignment.hh>
#include <core/alignment/scoring/Picasso3.hh>
#include <core/alignment/scoring/FrequencyScaledGapPenalty.hh>
#include <core/data/io/alignment_io.hh>
#include <core/data/sequence/Sequence.hh>

#include <utils/exit.hh>

std::string program_info = R"(

Calculate global alignment between two sequence profiles with a gap penalty that depends on observed gap probabilities

USAGE:
    ap_align_profiles d4proc1-A1.profile d4proc1-A2.profile [gap_open gap_extend]

)";

/** @brief Calculate all pairwise sequence alignments between sequence profiles
 *
 * CATEGORIES: core/alignment/NWAlignerAnyGap
 * KEYWORDS:   FASTA input; Needleman-Wunsch; sequence alignment
 * GROUP:      Alignments
 */
int main(const int argc, const char* argv[]) {

  if(argc < 3) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  using namespace core::data::io;
  using namespace core::data::sequence;
  using namespace core::alignment::scoring;

  float gap_open = -10;
  float gap_cont = -1;
  if(argc == 5) {
    gap_open = atof(argv[3]);
    gap_cont = atof(argv[4]);
  }

  utils::Logger logs("ap_align_profiles");

  // --- the query profile
  SequenceProfile_SP query = read_profile_table(argv[1]);
  std::vector<float> query_gap_open, query_gap_cont;
  query->get_probabilities(core::chemical::Monomer::GAP,query_gap_open);
  query->get_probabilities(core::chemical::Monomer::GPE,query_gap_cont);

  logs << utils::LogLevel::INFO << "Query sequence is: " << query->sequence<<"\n";
  query->write_table();

  // --- the template profile
  SequenceProfile_SP tmplt = read_profile_table(argv[2]);
  std::vector<float> tmplt_gap_open, tmplt_gap_cont;
  tmplt->get_probabilities(core::chemical::Monomer::GAP,tmplt_gap_open);
  tmplt->get_probabilities(core::chemical::Monomer::GPE,tmplt_gap_cont);

  logs << utils::LogLevel::INFO << "Template sequence is: " << tmplt->sequence<<"\n";
  
  // --- scoring system
  const Picasso3 scoring(query,tmplt);
  const FrequencyScaledGapPenalty gaps(gap_open,gap_cont,query_gap_open,query_gap_cont,tmplt_gap_open,tmplt_gap_cont);

  // --- create aligner object
  core::alignment::NWAlignerAnyGap<Picasso3,FrequencyScaledGapPenalty> aligner(std::max(query->length(),tmplt->length()));

  auto start = std::chrono::high_resolution_clock::now(); // --- timer starts!
  float score = aligner.align(scoring,gaps);
  auto ali = aligner.backtrace();
  core::alignment::PairwiseSequenceAlignment seq_ali(ali, query, tmplt);
  std::cout << seq_ali.get_aligned_query('*') << "\n";
  std::cout << seq_ali.get_aligned_template('*') << "\n";

  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
}
../_images/file_icon.png