ap_aligned_pdb

Reads an alignment between two proteins (PIR format) and the two respective protein structures (PDB format) and writes the aligned parts of the two structures.

The program concerns only the first two sequences found in the PIR file; they must be given in the same order as the input PDB files. Only the first chain will be used from either structure; if you need to superimpose chain ‘B’, use strc command to extract it prior using ap_aligned_pdb.

The program writes ‘query’ and ‘tmplt’ files which contain the respective structure fragments, already superimposed (the template on the query). One of the two structures may be missing (either the query or the template), dash ‘-‘ should be used instead of the respective file name, as in the examples below.

USAGE:
ap_aligned_pdb example.pir prot1.pdb prot2.pdb
ap_aligned_pdb example.pir - prot2.pdb
ap_aligned_pdb example.pir prot1.pdb -

Keywords:

Categories:

  • core/data/io/pir_io

Program source:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include <iostream>

#include <core/data/io/pir_io.hh>
#include <core/data/io/Pdb.hh>
#include <core/data/io/alignment_io.hh>
#include <core/data/io/fasta_io.hh>

#include <utils/exit.hh>
#include <core/alignment/PairwiseSequenceAlignment.hh>
#include <core/data/structural/structure_selectors.hh>
#include <core/calc/structural/transformations/Crmsd.hh>
#include <utils/Logger.hh>

std::string program_info = R"(

Reads an alignment between two proteins (PIR format) and the two respective protein structures (PDB format)
and writes the aligned parts of the two structures.

The program concerns only the first two sequences found in the PIR file; they must be given in the same order
as the input PDB files. Only the first chain will be used from either structure; if you need to superimpose chain 'B',
use strc command to extract it prior using ap_aligned_pdb.

The program writes 'query' and 'tmplt' files which contain the respective structure fragments, already superimposed
(the template on the query). One of the two structures may be missing (either the query or the template), dash '-'
should be used instead of the respective file name, as in the examples below.


USAGE:
    ap_aligned_pdb example.pir prot1.pdb prot2.pdb
    ap_aligned_pdb example.pir - prot2.pdb
    ap_aligned_pdb example.pir prot1.pdb -

)";

using namespace core::data::structural;
utils::Logger logs("ap_aligned_pdb");

Structure_SP process_input_pdb(const std::string &pdb_fname, std::vector<Residue_SP> &residues, const ResidueSelector & which_part) {

  IsAA aa_only;
  // --- Read the first structure and repack its amino acid residues (the other cannot be aligned)
  core::data::io::Pdb reader(pdb_fname, core::data::io::is_not_alternative, true);
  Structure_SP strctr = reader.create_structure(0);

  logs << utils::LogLevel::INFO << "Selecting " << utils::to_string(which_part)<<" from "<<strctr->code()<<"\n";
  for(auto i_chain : *strctr) {
    for(auto i_resid : *i_chain)
      if (which_part(*i_resid) && aa_only(*i_resid)) {
        residues.push_back(i_resid);
      }
  }

  return strctr;
}

/** @brief  Reads an alignment between two proteins (PIR format) and the two structures and writes PDB for the aligned parts
 *
 * CATEGORIES: core/data/io/pir_io;
 * KEYWORDS:   PDB input; PIR input; PDB output
 * IMG: ap_aligned-1k6m-1bif.png
 * IMG_ALT: 1K6M and 1BIF structures aligned according to HOMSTRAD database
 */
int main(const int argc, const char *argv[]) {

  if (argc < 4) utils::exit_OK_with_message(program_info); // --- complain about missing program parameters

  using core::data::sequence::Sequence_SP; // --- Sequence_SP is just a std::shared_ptr to core::data::sequence::Sequence type
  using namespace core::alignment;
  using namespace core::data::structural;

  // --- Create a container where the sequences will be stored
  std::vector<Sequence_SP> sequences;

  // --- Read a file with PIR sequences and create an alignment object
  core::data::io::read_pir_file(argv[1], sequences);
  PairwiseSequenceAlignment alignment("query", sequences[0]->sequence, 0, "tmplt", sequences[1]->sequence, 0, 0.0);

  const auto select_query = core::data::structural::select_by_pir_header(*std::dynamic_pointer_cast<PirEntry>(sequences[0]));
  const auto select_tmplt = core::data::structural::select_by_pir_header(*std::dynamic_pointer_cast<PirEntry>(sequences[1]));

  // --- Read the first structure and repack its amino acid residues (the other cannot be aligned)
  Structure_SP query_structure, tmplt_structure;
  std::vector<Residue_SP> query_residues, tmplt_residues;
  if (strncmp(argv[2], "-", 1) != 0) query_structure = process_input_pdb(argv[2], query_residues,*select_query);
  if (strncmp(argv[3], "-", 1) != 0) tmplt_structure = process_input_pdb(argv[3], tmplt_residues,*select_tmplt);

  std::stringstream ss;
  core::data::io::write_edinburgh(alignment,ss,65535);
  logs << utils::LogLevel::INFO << "Input alignment\n" << ss.str() << "\n";

  // --- Retrieve aligned residues from the two structures according to the alignment object
  std::vector<Residue_SP> tmplt_residues_aligned, query_residues_aligned;   // --- container for the residues

  if (query_residues.size() == 0) alignment.alignment->get_aligned_template(tmplt_residues, tmplt_residues_aligned);
  if (tmplt_residues.size() == 0) alignment.alignment->get_aligned_query(query_residues, query_residues_aligned);

  // --- If both sets of coordinates are present - retrieve both and superimpose
  // --- Also, when both structures are given - calculate crmsd and roto-translation transformation
  if ((query_residues.size() > 0) && (tmplt_residues.size() > 0)) {
    alignment.alignment->get_aligned_query_template(query_residues, tmplt_residues, query_residues_aligned, tmplt_residues_aligned);
    std::vector<Vec3> query_xyz, tmplt_xyz;
    for (auto res:query_residues_aligned) query_xyz.push_back(*res->find_atom(" CA "));
    for (auto res:tmplt_residues_aligned) {
      tmplt_xyz.push_back(*res->find_atom(" CA "));
    }
    core::calc::structural::transformations::Crmsd<std::vector<Vec3>, std::vector<Vec3>> rms;
    std::cout << "crmsd between coordinates of " << query_xyz.size() << " CA atoms: " <<
              rms.crmsd(tmplt_xyz, query_xyz, query_xyz.size(), true) << "\n";
    for (auto res:tmplt_residues_aligned) for (auto atom:*res) rms.apply(*atom);
  }

  // --- Rotate the query coordinates and superimpose them on the template; print them in PDB format
  if (query_residues_aligned.size() > 0) {
    std::ofstream query_file("query.pdb");
    for (auto res:query_residues_aligned)
      for (auto atom:*res) query_file << atom->to_pdb_line() << "\n";
    query_file.close();
  }
  // --- Print template coordinates in PDB format
  if (tmplt_residues_aligned.size() > 0) {
    std::ofstream tmplt_file("tmplt.pdb");
    for (auto res:tmplt_residues_aligned)
      for (auto atom:*res) tmplt_file << atom->to_pdb_line() << "\n";
    tmplt_file.close();
  }
}
../_images/ap_aligned-1k6m-1bif.png