ex_FastaMatchProtocol

ex_FastaMatchProtocol finds similar substrings between two amino acid sequences. FastaMatchProtocol implements FAST algorithm to detect similar subsequences. This example just prints the list of FAST matches found between any two sequences from the input set.

USAGE:

ex_FastaMatchProtocol input.fasta [n_threads]

EXAMPLES:

ex_FastaMatchProtocol small500_95identical.fasta 4

REFERENCE: Smith, Temple F., and Michael S. Waterman. “Identification of common molecular subsequences.” PNAS 85 (1988): 2444-8 doi:10.1073/pnas.85.8.2444

Keywords:

Categories:

  • core/protocols/PairwiseSequenceIdentityProtocol.hh

Input files:

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <core/index.hh>
#include <utils/exit.hh>
#include <core/data/io/fasta_io.hh>
#include <core/protocols/PairwiseSequenceIdentityProtocol.hh>
#include <core/protocols/FastaMatchProtocol.hh>

std::string program_info = R"(

ex_FastaMatchProtocol finds similar substrings between two amino acid sequences.

FastaMatchProtocol implements FAST algorithm to detect similar subsequences. This example just prints the list
of FAST matches found between any two sequences from the input set.

USAGE:
    ex_FastaMatchProtocol input.fasta [n_threads]

EXAMPLES:
    ex_FastaMatchProtocol small500_95identical.fasta 4

REFERENCE:
  Smith, Temple F., and Michael S. Waterman. "Identification of common molecular subsequences." 
  PNAS 85 (1988): 2444-8 doi:10.1073/pnas.85.8.2444

)";

/** @brief Uses FastaMatchProtocol protocol to find similar substrings between two amino acid sequences
 *
 * CATEGORIES: core/protocols/PairwiseSequenceIdentityProtocol.hh
 * KEYWORDS:   FASTA input; sequence alignment; statistics
 */
int main(const int argc, const char* argv[]) {

  if(argc < 2) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  using namespace core::data::sequence;
  using namespace core::protocols;
  using namespace core::alignment;

  utils::Logger logs("ex_FastaMatchProtocol");
  core::index2 n_threads = (argc > 2) ? atoi(argv[2]) : 4;

  logs << utils::LogLevel::INFO << "number of threads used : " << n_threads << "\n";

  core::protocols::FastaMatchProtocol protocol;
  protocol.longest_gap(8).keep_alignments(true).n_threads(n_threads).printed_seqname_length(5);
  protocol.batch_size(10000);

  std::vector<Sequence_SP> input_sequences;
  core::data::io::read_fasta_file(argv[1], input_sequences);
  for(Sequence_SP si: input_sequences) protocol.add_input_sequence(si);

  auto start = std::chrono::high_resolution_clock::now(); // --- timer starts!
  protocol.run();
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
  logs << utils::LogLevel::INFO << (size_t ) protocol.n_jobs_completed()
       << " FASTA matches calculated within " << time_span.count() << " [s]\n";

  protocol.print_header(std::cout);
  protocol.print_diagonal_matches(std::cout);
}
../_images/file_icon.png