ap_align_profiles

Read two files with sequence profiles (BioShell’s tabular format) and calculates global alignment between them. The gap penalty function depends on observed gap probabilities. Prints sequence alignment as an output. The default for values for base gap penalty is -10 and -1 for gap_open and gap_extend, respectively.

USAGE:

ap_align_profiles <file1.profile> <file2.profile> [gap_open gap_extend]

EXAMPLE:

ap_align_profiles d4proc1-A1.profile d4proc1-A2.profile -11 -2

Keywords:

Categories:

  • core/alignment/NWAlignerAnyGap

Input files:

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <iostream>
#include <chrono>
#include <algorithm>

#include <core/data/io/fasta_io.hh>

#include <core/alignment/NWAlignerAnyGap.hh>
#include <core/alignment/PairwiseSequenceAlignment.hh>
#include <core/alignment/scoring/Picasso3.hh>
#include <core/alignment/scoring/FrequencyScaledGapPenalty.hh>
#include <core/data/io/alignment_io.hh>
#include <core/data/sequence/Sequence.hh>

#include <utils/exit.hh>

std::string program_info = R"(

Read two files with sequence profiles (BioShell’s tabular format) and calculates global alignment between them.
The gap penalty function depends on observed gap probabilities. Prints sequence alignment as an output.
The default for values for base gap penalty is -10 and -1 for gap_open and gap_extend, respectively.

USAGE:
ap_align_profiles <file1.profile> <file2.profile> [gap_open gap_extend]

EXAMPLE:
ap_align_profiles d4proc1-A1.profile d4proc1-A2.profile -11 -2

)";

/** @brief Calculate all pairwise sequence alignments between sequence profiles
 *
 * CATEGORIES: core/alignment/NWAlignerAnyGap
 * KEYWORDS:   FASTA input; Needleman-Wunsch; sequence alignment
 * GROUP:      Alignments
 */
int main(const int argc, const char* argv[]) {

  if(argc < 3) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  using namespace core::data::io;
  using namespace core::data::sequence;
  using namespace core::alignment::scoring;

  float gap_open = -10;
  float gap_extend = -1;
  if(argc == 5) {
    gap_open = atof(argv[3]);
    gap_extend = atof(argv[4]);
  }

  utils::Logger logs("ap_align_profiles");

  // --- the query profile
  SequenceProfile_SP query = read_profile_table(argv[1]);
  std::vector<float> query_gap_open, query_gap_extend;
  query->get_probabilities(core::chemical::Monomer::GAP,query_gap_open);
  query->get_probabilities(core::chemical::Monomer::GPE,query_gap_extend);

  logs << utils::LogLevel::INFO << "Query sequence is: " << query->sequence<<"\n";

  // --- the template profile
  SequenceProfile_SP tmplt = read_profile_table(argv[2]);
  std::vector<float> tmplt_gap_open, tmplt_gap_extend;
  tmplt->get_probabilities(core::chemical::Monomer::GAP,tmplt_gap_open);
  tmplt->get_probabilities(core::chemical::Monomer::GPE,tmplt_gap_extend);

  logs << utils::LogLevel::INFO << "Template sequence is: " << tmplt->sequence<<"\n";
  
  // --- scoring system
  const Picasso3 scoring(query,tmplt);
  const FrequencyScaledGapPenalty gaps(gap_open,gap_extend,query_gap_open,query_gap_extend,tmplt_gap_open,tmplt_gap_extend);

  // --- create aligner object
  core::alignment::NWAlignerAnyGap<Picasso3,FrequencyScaledGapPenalty> aligner(std::max(query->length(),tmplt->length()));

  auto start = std::chrono::high_resolution_clock::now(); // --- timer starts!
  float score = aligner.align(scoring,gaps);
  auto ali = aligner.backtrace();
  core::alignment::PairwiseSequenceAlignment seq_ali(ali, query, tmplt);
  std::cout << seq_ali.get_aligned_query('*') << "\n";
  std::cout << seq_ali.get_aligned_template('*') << "\n";

  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
}
../_images/file_icon.png