ap_atom_correlations

ap_atom_correlations reads a multimodel PDB trajectory and calculates correlations between atomic coordinates

USAGE:
ap_atom_correlations 2kwi.pdb

where 2kwi.pdb is the input file. The output, printed on the screen, provides nine columns: i-atom j-atom covariance(i,j)

where the covariance between is computed

Categories:

  • core::data::io::Pdb::fill_structure; core::calc::statistics::OnlineMultivariateStatistics

Input files:

Output files:

Program source:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#include <iostream>
#include <iomanip>

#include <core/data/io/Pdb.hh>
#include <core/calc/statistics/OnlineMultivariateStatistics.hh>
#include <utils/string_utils.hh>
#include <utils/exit.hh>

std::string program_info = R"(

ap_atom_correlations reads a multimodel PDB trajectory and calculates correlations between atomic coordinates

USAGE:
    ap_atom_correlations 2kwi.pdb

where 2kwi.pdb is the input file. The output, printed on the screen, provides nine columns:
i-atom j-atom covariance(i,j)

where the covariance between is computed 

)";

/** @brief Reads a multimodel PDB trajectory and calculates correlation between atomic coordinates
 *
 * CATEGORIES: core::data::io::Pdb::fill_structure; core::calc::statistics::OnlineMultivariateStatistics
 * KEYWORDS: PDB input; statistics
 */
int main(const int argc, const char *argv[]) {

  if (argc < 2) utils::exit_OK_with_message(program_info); // --- complain about missing program parameter

  core::data::io::Pdb reader(argv[1], core::data::io::is_ca); // --- Read PDB file, may be gzip-ped; take only the lines with C-alphas
  std::vector<core::data::basic::Vec3> atoms(reader.count_atoms(0));
  std::vector<double> xyz(atoms.size() * 3);
  core::calc::statistics::OnlineMultivariateStatistics stats(xyz.size());

  // --- Read all models from the deposit, store alpha carbons from each model as a sebarate vector of double values
  for (size_t i = 0; i < reader.count_models(); ++i) { // --- Iterate over all models in the input file
    reader.fill_structure(i, atoms);
    // --- utilize coordinates of the new pose
    for (size_t j = 0; j < atoms.size(); ++j) {
      xyz[j * 3] = atoms[j].x;
      xyz[j * 3 + 1] = atoms[j].y;
      xyz[j * 3 + 2] = atoms[j].z;
    }
    stats(xyz);
  }

  std::vector<std::string> labels;
  const auto structure = reader.create_structure(0);
  for(auto it = structure->first_const_residue(); it!=structure->last_const_residue();++it)
    labels.push_back(utils::string_format("%4d %3s CA", (**it).id(), (**it).residue_type().code3.c_str()));


  std::cout << "# i-resid coord  j-resid coord  i    j  correlation\n";
  std::string xyz_chars = "XYZ";
  std::cout << "#ipos j-pos correlation\n";
  for (size_t i = 0; i < xyz.size(); ++i) {
    for (size_t j = 0; j < xyz.size(); ++j) {
      std::cout << labels[int(i / 3)] << "-" << xyz_chars[i % 3] << " " << labels[int(j / 3)] << "-" << xyz_chars[j % 3];
      std::cout << " " << std::setw(4) << i << " " << std::setw(4) << j << " " << stats.covar(i, j) << "\n";
    }
  }
}
../_images/file_icon.png