IMP logo
pdb.h
Go to the documentation of this file.
1 /**
2  * \file atom/pdb.h
3  * \brief Functions to read pdbs
4  *
5  * Copyright 2007-2012 IMP Inventors. All rights reserved.
6  *
7  */
8 #ifndef IMPATOM_PDB_H
9 #define IMPATOM_PDB_H
10 
11 #include "atom_config.h"
12 #include "Hierarchy.h"
13 #include "Atom.h"
14 #include "element.h"
15 #include "internal/pdb.h"
16 #include "atom_macros.h"
17 #include <IMP/file.h>
18 #include <IMP/Model.h>
19 #include <IMP/Particle.h>
20 #include <IMP/FailureHandler.h>
21 #include <IMP/OptimizerState.h>
22 #include <IMP/internal/utility.h>
23 #include <boost/format.hpp>
24 
25 IMPATOM_BEGIN_NAMESPACE
26 
27 
28 //! Select which atoms to read from a PDB file
29 /** Selector is a general purpose class used to select records from a PDB
30  file. Using descendants of this class one may implement arbitrary
31  selection functions with operator() and pass them to PDB reading functions
32  for object selection. Simple selectors can be used to build more complicated
33  ones. Inheritence means "AND" unless otherwise noted (that is, the
34  CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
35  NonAlternativePDBSelector).
36 
37  \see read_pdb
38 */
39 class IMPATOMEXPORT PDBSelector: public IMP::base::Object {
40  public:
41  PDBSelector(std::string name): Object(name){}
42  //! Return true if the line should be processed
43  virtual bool get_is_selected(const std::string& pdb_line) const=0;
44  virtual ~PDBSelector();
45 };
46 
47 IMP_OBJECTS(PDBSelector, PDBSelectors);
48 
49 //! Select all ATOM and HETATM records which are not alternatives
51  public:
53  return (internal::atom_alt_loc_indicator(pdb_line) == ' '
54  || internal::atom_alt_loc_indicator(pdb_line)
55  == 'A'),out << "");
56 };
57 
58 //! Select all non-alternative ATOM records
60 public:
62  return NonAlternativePDBSelector::get_is_selected(pdb_line)
63  && internal::is_ATOM_rec(pdb_line),out << "");
64 };
65 
66 
67 //! Select all CA ATOM records
69  public:
71  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
72  return false;
73  }
74  const std::string type = internal::atom_type(pdb_line);
75  return (type[1] == 'C' && type[2] == 'A'
76  && type[3] == ' '),out << "");
77 };
78 
79 //! Select all CB ATOM records
80 class CBetaPDBSelector: public NonAlternativePDBSelector {
81  public:
83  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)){
84  return false;
85  }
86  const std::string type = internal::atom_type(pdb_line);
87  return (type[1] == 'C' && type[2] == 'B'
88  && type[3] == ' '),out << "");
89 };
90 
91 //! Select all C (not CA or CB) ATOM records
92 class CPDBSelector: public NonAlternativePDBSelector {
93  public:
95  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
96  return false;
97  }
98  const std::string type = internal::atom_type(pdb_line);
99  return (type[1] == 'C' && type[2] == ' ' && type[3] == ' '),
100  out << ""
101  );
102 };
103 
104 //! Select all N ATOM records
105 class NPDBSelector: public NonAlternativePDBSelector {
106  public:
108  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
109  return false;
110  }
111  const std::string type = internal::atom_type(pdb_line);
112  return (type[1] == 'N' && type[2] == ' ' && type[3] == ' '),
113  out << ""
114  );
115 };
116 
117 //! Defines a selector that will pick every ATOM and HETATM record
118 class AllPDBSelector : public PDBSelector {
119 public:
121  return true || pdb_line.empty(),
122  out << "");
123 };
124 
125 //! Select all ATOM and HETATMrecords with the given chain ids
127  public:
128  bool get_is_selected(const std::string &pdb_line) const {
129  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
130  return false;
131  }
132  for(int i=0; i < (int)chains_.length(); i++) {
133  if(internal::atom_chain_id(pdb_line) == chains_[i])
134  return true;
135  }
136  return false;
137  }
138  IMP_OBJECT_INLINE(ChainPDBSelector,out << chains_,);
139  //! The chain id can be any character in chains
140  ChainPDBSelector(const std::string &chains,
141  std::string name="ChainPDBSelector%1%"):
142  NonAlternativePDBSelector(name), chains_(chains) {}
143  private:
144  std::string chains_;
145 };
146 
147 //! Select all non-water ATOM and HETATMrecords
149  public:
151  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
152  return false;
153  }
154  const std::string res_name
155  = internal::atom_residue_name(pdb_line);
156  return ((res_name[0]=='H' && res_name[1] =='O'
157  && res_name[2]=='H') ||
158  (res_name[0]=='D' && res_name[1] =='O'
159  && res_name[2]=='D')),
160  out << ""
161  );
162 };
163 
164 //! Select all hydrogen ATOM and HETATM records
165 class IMPATOMEXPORT HydrogenPDBSelector : public NonAlternativePDBSelector {
166  bool is_hydrogen(std::string pdb_line) const;
167  public:
169  return is_hydrogen(pdb_line);,
170  out << "");
171 };
172 
173 //! Select non water and non hydrogen atoms
174 class NonWaterNonHydrogenPDBSelector : public NonAlternativePDBSelector {
176  public:
177  bool get_is_selected(const std::string &pdb_line) const {
178  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
179  return false;
180  }
181  return (! ws_->get_is_selected(pdb_line)
182  && ! hs_->get_is_selected(pdb_line));
183  }
184  IMP_OBJECT_INLINE(NonWaterNonHydrogenPDBSelector,out << *ws_,);
185  NonWaterNonHydrogenPDBSelector(std::string name):
186  NonAlternativePDBSelector(name),
187  ws_(new WaterPDBSelector()),
188  hs_(new HydrogenPDBSelector()){}
189  NonWaterNonHydrogenPDBSelector():
190  NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
191  ws_(new WaterPDBSelector()),
192  hs_(new HydrogenPDBSelector()){}
193 };
195 //! Select all non-water non-alternative ATOM and HETATM records
198  public:
199  bool get_is_selected(const std::string &pdb_line) const {
200  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
201  return false;
202  }
203  return( ! ws_->get_is_selected(pdb_line));
204  }
206  NonWaterPDBSelector(std::string name): NonAlternativePDBSelector(name),
207  ws_(new WaterPDBSelector()){}
208  NonWaterPDBSelector(): NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
209  ws_(new WaterPDBSelector()){}
210 };
212 //! Select all P ATOM records
214  public:
217  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
218  return false;
219  }
220  const std::string type = internal::atom_type(pdb_line);
221  return (type[1] == 'P' && type[2] == ' '),
222  out << "");
223 };
224 
225 // these do not work in python as the wrapped selectors get cleaned up
226 //! Select atoms which are selected by both selectors
227 /** To use do something like
228  \code
229  read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
230  \endcode
231  */
233  const IMP::OwnerPointer<PDBSelector> a_, b_;
234 public:
235  bool get_is_selected(const std::string &pdb_line) const {
236  return a_->get_is_selected(pdb_line)
237  && b_->get_is_selected(pdb_line);
238  }
239  IMP_OBJECT_INLINE(AndPDBSelector,out << *a_ << " and " << *b_,);
241  PDBSelector("AndPDBSelector%1%"), a_(a), b_(b){}
242 };
243 
244 //! Select atoms which are selected by either selector
245 /** To use do something like
246  \code
247  read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
248  \endcode
249  */
250 class OrPDBSelector: public PDBSelector {
251  const IMP::OwnerPointer<PDBSelector> a_, b_;
252 public:
253  bool get_is_selected(const std::string &pdb_line) const {
254  return a_->get_is_selected(pdb_line)
255  || b_->get_is_selected(pdb_line);
256  }
257  IMP_OBJECT_INLINE(OrPDBSelector,out << *a_ << " or " << *b_,);
259  PDBSelector("OrPDBSelector%1%"), a_(a), b_(b){}
260 };
261 
262 //! Select atoms which not selected by a given selector
263 /** To use do something like
264  \code
265  read_pdb(name, m, NotPDBSelector(PPDBSelector()));
266  \endcode
267  */
268 class NotPDBSelector: public PDBSelector {
270 public:
271  bool get_is_selected(const std::string &pdb_line) const {
272  return !a_->get_is_selected(pdb_line);
273  }
274  IMP_OBJECT_INLINE(NotPDBSelector,out << "not" << *a_,);
275  NotPDBSelector( PDBSelector *a): PDBSelector("NotPDBSelector%1%"),
276  a_(a){}
277 };
278 
279 
280 /** @name PDB Reading
281  \anchor pdb_in
282  The read PDB methods produce a hierarchy that looks as follows:
283  - One Atom per ATOM or HETATM record in the PDB.
284  - All Atom particles have a parent which is a Residue.
285  - All Residue particles have a parent which is a Chain.
286 
287  Waters are currently dropped if they are ATOM records. This can be fixed.
288 
289  The read_pdb() functions should successfully parse all valid pdb files. It
290  can produce warnings on files which are not valid. It will attempt to read
291  such files, but all bets are off.
292 
293  When reading PDBs, PDBSelector objects can be used to choose to only process
294  certain record types. See the class documentation for more information.
295  When no PDB selector is supplied for reading, the
296  NonWaterPDBSelector is used.
297 
298  Set the IMP::LogLevel to VERBOSE to see details of parse errors.
299 */
300 //!@{
301 
302 /** Read a all the molecules in the first model of the
303  pdb file.
304 
305  \relatesalso Hierarchy
306  */
307 IMPATOMEXPORT Hierarchy read_pdb(base::TextInput input,
308  Model* model);
309 
310 /** Rewrite the coordinates of the passed hierarchy based
311  on the contents of the first model in the pdb file.
312 
313  The hierarchy must have been created by reading from a pdb
314  file and the atom numbers must correspond between the files.
315  These are not really checked.
316 
317  A ValueException is thrown if there are insufficient models
318  in the file.
319 
320  \relatesalso Hierarchy
321  */
322 IMPATOMEXPORT void read_pdb(base::TextInput input,
323  int model,
324  Hierarchy h);
325 
326 
327 /** \relatesalso Hierarchy
328  */
329 IMPATOMEXPORT Hierarchy
330 read_pdb(base::TextInput input,
331  Model* model,
332  PDBSelector* selector,
333  bool select_first_model = true
334 #ifndef IMP_DOXYGEN
335  , bool no_radii=false
336 #endif
337  );
338 
339 
340 
341 /** \relatesalso Hierarchy
342  */
343 IMPATOMEXPORT Hierarchies read_multimodel_pdb(base::TextInput input,
344  Model *model,
345  PDBSelector* selector);
346 /** \relatesalso Hierarchy
347  */
348 IMPATOMEXPORT Hierarchies read_multimodel_pdb(base::TextInput input,
349  Model *model);
350 /** @} */
351 
352 /** @name PDB Writing
353  \anchor pdb_out
354  The methods to write a PDBs expects a Hierarchy that looks as follows:
355  - all leaves are Atom particles
356  - all Atom particles have Residue particles as parents
357 
358  All Residue particles that have a Chain particle as an ancestor
359  are considered part of a protein, DNA or RNA, ones without are
360  considered heterogens.
361 
362  The functions produce files that are not valid PDB files,
363  eg only ATOM/HETATM lines are printed for all Atom particles
364  in the hierarchy. Complain if your favorite program can't read them and
365  we might fix it.
366 */
367 //!@{
368 
369 /** \relatesalso Hierarchy
370 */
371 IMPATOMEXPORT void write_pdb(Hierarchy mhd,
372  base::TextOutput out,
373  unsigned int model=0);
374 /** \relatesalso Hierarchy
375 */
376 IMPATOMEXPORT void write_pdb(const Hierarchies &mhd,
377  base::TextOutput out,
378  unsigned int model=0);
379 
380 /** \relatesalso Hierarchy
381 */
382 IMPATOMEXPORT void write_multimodel_pdb(
383  const Hierarchies& mhd, base::TextOutput out);
384 
385 /** @} */
386 
387 
388 #ifndef IMP_DOXYGEN
389 
390 /**
391  This function returns a string in PDB ATOM format
392 */
393 IMPATOMEXPORT std::string get_pdb_string(const algebra::Vector3D& v,
394  int index = -1,
395  AtomType at = AT_C,
396  ResidueType rt = atom::ALA,
397  char chain = ' ',
398  int res_index = 1,
399  char res_icode = ' ',
400  double occpancy = 1.00,
401  double tempFactor = 0.00,
402  Element e = C);
403 
404 /**
405  This function returns a connectivity string in PDB format
406  /note The CONECT records specify connectivity between atoms for which
407  coordinates are supplied. The connectivity is described using
408  the atom serial number as found in the entry.
409  /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
410 */
411 IMPATOMEXPORT std::string get_pdb_conect_record_string(int,int);
412 #endif
413 
414 
415 
416 /** \class WritePDBOptimizerState
417  This writes a PDB file at the specified interval during optimization.
418  If the file name contains %1% then a new file is written each time
419  with the %1% replaced by the index. Otherwise a new model is written
420  each time to the same file.
421  \class WritePDBFailureHandler
422  Write a PDB when an error occurs.
423 */
424 IMP_MODEL_SAVE(WritePDB, (const atom::Hierarchies& mh, std::string file_name),
425  atom::Hierarchies mh_;,
426  mh_=mh;,
427  ,
428  {
429  base::TextOutput to(file_name, append);
430  IMP_LOG(TERSE, "Writing pdb file " << file_name << std::endl);
431  atom::write_pdb(mh_,to, append?call:0);
432  });
433 
434 
435 IMPATOM_END_NAMESPACE
436 
437 #endif /* IMPATOM_PDB_H */

Generated on Tue May 22 2012 23:33:15 for IMP by doxygen 1.8.1