ex_FastaMatchProtocol

ex_FastaMatchProtocol prints diagonal hits

USAGE:
ex_FastaMatchProtocol input.fasta [n_threads [seqID_cutoff  ]
EXAMPLES:
ap_PairwiseSequenceIdentityProtocol small500_95identical.fasta 4

Keywords:

Categories:

  • core/protocols/PairwiseSequenceIdentityProtocol.hh

Input files:

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include <core/index.hh>
#include <utils/exit.hh>
#include <core/data/io/fasta_io.hh>
#include <core/protocols/PairwiseSequenceIdentityProtocol.hh>
#include <core/protocols/FastaMatchProtocol.hh>

std::string program_info = R"(

ex_FastaMatchProtocol prints diagonal hits

USAGE:
    ex_FastaMatchProtocol input.fasta [n_threads [seqID_cutoff  ]
EXAMPLES:
    ap_PairwiseSequenceIdentityProtocol small500_95identical.fasta 4

)";

/** @brief Uses FastaMatchProtocol protocol to find similar substrings between two amino acid sequences
 *
 * FastaMatchProtocol implements FASTA algorithm to detect similar subsequences. This example just prints the list
 * of FASTA matches found between any two sequences from the input set
 *
 * USAGE:
 *     ex_FastaMatchProtocol input.fasta [n_threads]
 * EXAMPLES:
 *     ex_FastaMatchProtocol unique100.fasta 4

 * CATEGORIES: core/protocols/PairwiseSequenceIdentityProtocol.hh
 * KEYWORDS:   FASTA input; sequence alignment; p-value
 */
int main(const int argc, const char* argv[]) {

  if(argc < 2) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  using namespace core::data::sequence;
  using namespace core::protocols;
  using namespace core::alignment;

  utils::Logger logs("ex_FastaMatchProtocol");
  core::index2 n_threads = (argc > 2) ? atoi(argv[2]) : 4;

  logs << utils::LogLevel::INFO << "number of threads used : " << n_threads << "\n";

  core::protocols::FastaMatchProtocol protocol;
  protocol.longest_gap(8).keep_alignments(true).n_threads(n_threads).printed_seqname_length(5);
  protocol.batch_size(10000);

  std::vector<Sequence_SP> input_sequences;
  core::data::io::read_fasta_file(argv[1], input_sequences);
  for(Sequence_SP si: input_sequences) protocol.add_input_sequence(si);

  auto start = std::chrono::high_resolution_clock::now(); // --- timer starts!
  protocol.run();
  auto end = std::chrono::high_resolution_clock::now();
  std::chrono::duration<double> time_span = std::chrono::duration_cast<std::chrono::duration<double>>(end - start);
  logs << utils::LogLevel::INFO << (size_t ) protocol.n_jobs_completed()
       << " FASTA matches calculated within " << time_span.count() << " [s]\n";

  protocol.print_header(std::cout);
  protocol.print_diagonal_matches(std::cout);
}
../_images/file_icon.png