ap_AlignmentPValuesProtocolΒΆ

ap_AlignmentPValuesProtocol calculates each-vs-each pairwise semiglobal alignments between protein sequences read from a given input file. p-value for every alignment is estimated based on re-shuffled statistics (30 randomly shuffled alignments are calculated)

USAGE:

ap_AlignmentPValuesProtocol input.fasta

EXAMPLE:

ap_AlignmentPValuesProtocol small500_95identical.fasta

Keywords:

Categories:

  • core/protocols/AlignmentPValuesProtocol.hh

Input files:

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#include <utils/exit.hh>
#include <core/alignment/aligner_factory.hh>
#include <core/data/basic/Array2D.hh>
#include <core/data/sequence/Sequence.hh>
#include <core/protocols/AlignmentPValuesProtocol.hh>
#include <core/data/io/fasta_io.hh>
#include <core/data/basic/SparseMap2D.hh>

std::string program_info = R"(

ap_AlignmentPValuesProtocol calculates each-vs-each pairwise semiglobal alignments
between protein sequences read from a given input file. p-value for every alignment 
is estimated based on re-shuffled statistics (30 randomly shuffled alignments are 
calculated)

USAGE:
    ap_AlignmentPValuesProtocol input.fasta

EXAMPLE:
    ap_AlignmentPValuesProtocol small500_95identical.fasta

)";

/** @brief Uses AlignmentPValuesProtocol protocol to calculate all pairwise p-values for a given set of sequences
 *
 * CATEGORIES: core/protocols/AlignmentPValuesProtocol.hh
 * KEYWORDS:   FASTA input; sequence alignment; statistics
 * GROUP:      Alignments
 */
int main(const int argc, const char* argv[]) {

  if(argc < 2) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  using namespace core::data::sequence;
  using namespace core::protocols;
  using namespace core::alignment;

  core::protocols::AlignmentPValuesProtocol protocol;
  protocol.gap_open(-10).gap_extend(-1).substitution_matrix("BLOSUM62").keep_alignments(true).
    alignment_method(AlignmentType::SEMIGLOBAL_ALIGNMENT).keep_alignments(true).n_threads(4);
  protocol.n_shuffles(30).p_value_cutoff(0.01);

  std::vector<Sequence_SP> input_sequences;
  core::data::io::read_fasta_file(argv[1], input_sequences);
  for(Sequence_SP si: input_sequences) protocol.add_input_sequence(si);

  auto start = std::chrono::high_resolution_clock::now(); // --- timer starts!
  protocol.run();
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
  std::cerr << input_sequences.size() * (input_sequences.size() - 1) / 2.0
            << " global alignment sequence similarities calculated within " << time_span.count() << " [s]\n";

  protocol.print_p_values(std::cout);
}
../_images/file_icon.png