ex_SelectChainBreaks

Reads a PDB file and prints list of chain breaks found in every chain

USAGE:
ex_SelectChainBreaks test_inputs/4mcb.pdb

Categories:

  • core::data::structural::SelectChainBreaks

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#include <iostream>
#include <core/data/io/Pdb.hh>
#include <core/data/structural/SelectChainBreaks.hh>
#include <utils/exit.hh>

std::string program_info = R"(

Reads a PDB file and prints list of chain breaks found in every chain

USAGE:
    ex_SelectChainBreaks test_inputs/4mcb.pdb

)";

/** @brief Reads a PDB file and prints list of chain breaks found in every chain
 *
 * CATEGORIES: core::data::structural::SelectChainBreaks
 * KEYWORDS:   structure selectors; PDB input
 */
int main(const int argc, const char *argv[]) {

  using namespace core::data::structural;

  if (argc < 2) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  // ---------- Read a PDB file and create a Structure object
  core::data::io::Pdb reader(argv[1],core::data::io::is_not_water);
  auto strctr = reader.create_structure(0);

  SelectChainBreaks sel;
  bool chain_is_OK = true;
  for (const auto &chain : *strctr) {
    if (std::distance(chain->begin(),chain->terminal_residue()) < 3) {
      std::cerr << "Chain " << chain->id() << " of " << strctr->code() << " is too short\n";
      continue;
    }
    if (chain->count_aa_residues() < chain->size() * 0.8) {
      std::cerr << "Chain " << chain->id() << " of " << strctr->code() << " is not a protein\n";
      continue;
    }
    std::cerr << "# Processing " << strctr->code() << " chain " << chain->id() << "\n";
    size_t first_aa = 0;
    while ((*chain)[first_aa]->residue_type().type != 'P') ++first_aa;
    const auto last_it = chain->terminal_residue() - 1;
    for (auto res_it = chain->begin() + first_aa + 1; res_it != last_it; ++res_it) {
      auto next = (**res_it).next();
      if(next== nullptr) {
        next = *(++std::find(chain->begin(),chain->end(),*res_it));
        std::cerr << "Residue following "<<(**res_it)<<" is not an amino acid!\n";
      }
      auto prev = (**res_it).previous();
      if(prev == nullptr) {
        prev = *(--std::find(chain->begin(),chain->end(),*res_it));
        std::cerr << "Residue preceding "<<(**res_it)<<" is not an amino acid!\n";
      }

      if (sel(*res_it)) {
        chain_is_OK = false;
        if (sel.last_chainbreak_type == RIGHT) {
          std::cout << utils::string_format("%s %c %4d%c %4d%c %6.2f\n", strctr->code().c_str(), chain->id(),
              (*res_it)->id(), (*res_it)->icode(),next->id(), next->icode(), sel.right_side_distance);
          ++res_it;
          if (res_it == last_it) break;
        }
        if (sel.last_chainbreak_type == LEFT) {
          std::cout << utils::string_format("%s %c %4d%c %4d %c %6.2f\n", strctr->code().c_str(), chain->id(),
              prev->id(), prev->icode(), (*res_it)->id(), (*res_it)->icode(), sel.left_side_distance);
        }
        if (sel.last_chainbreak_type == BOTH) {
          std::cout << utils::string_format("%s %c %4d%c %4d %c %6.2f %6.2f\n", strctr->code().c_str(), chain->id(),
              prev->id(), prev->icode(), (*res_it)->id(), (*res_it)->icode(), sel.left_side_distance);
          std::cout << utils::string_format("%s %c %4d%c %4d %c\n", strctr->code().c_str(), chain->id(),
              (*res_it)->id(), (*res_it)->icode(), next->id(), next->icode(), sel.right_side_distance);
          ++res_it;
          if (res_it == last_it) break;
        }
      }
    }
    if(chain_is_OK) std::cout << utils::string_format("%s %c OK\n", strctr->code().c_str(), chain->id());
  }
}
../_images/file_icon.png