IMP logo

pdb.h

Go to the documentation of this file.
00001 /**
00002  *  \file atom/pdb.h
00003  *  \brief Functions to read pdbs
00004  *
00005  *  Copyright 2007-2011 IMP Inventors. All rights reserved.
00006  *
00007  */
00008 #ifndef IMPATOM_PDB_H
00009 #define IMPATOM_PDB_H
00010 
00011 #include "atom_config.h"
00012 #include "Hierarchy.h"
00013 #include "Atom.h"
00014 #include "element.h"
00015 #include "internal/pdb.h"
00016 #include "atom_macros.h"
00017 #include <IMP/file.h>
00018 #include <IMP/Model.h>
00019 #include <IMP/Particle.h>
00020 #include <IMP/FailureHandler.h>
00021 #include <IMP/OptimizerState.h>
00022 #include <IMP/internal/utility.h>
00023 #include <boost/format.hpp>
00024 
00025 IMPATOM_BEGIN_NAMESPACE
00026 
00027 
00028 //! Select which atoms to read from a PDB file
00029 /** Selector is a general purpose class used to select records from a PDB
00030     file. Using descendants of this class one may implement arbitrary
00031     selection functions with operator() and pass them to PDB reading functions
00032     for object selection. Simple selectors can be used to build more complicated
00033     ones. Inheritence means "AND" unless otherwise noted (that is, the
00034     CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
00035     NonAlternativePDBSelector).
00036 
00037     \see read_pdb
00038 */
00039 class IMPATOMEXPORT PDBSelector: public Object {
00040  public:
00041   //! Return true if the line should be processed
00042   virtual bool get_is_selected(const std::string& pdb_line) const=0;
00043   virtual ~PDBSelector();
00044 };
00045 
00046 IMP_OBJECTS(PDBSelector, PDBSelectors);
00047 
00048 //! Select all ATOM and HETATM records which are not alternatives
00049 class NonAlternativePDBSelector : public PDBSelector {
00050  public:
00051   IMP_PDB_SELECTOR(NonAlternativePDBSelector,
00052                    return (internal::atom_alt_loc_indicator(pdb_line) == ' '
00053                            || internal::atom_alt_loc_indicator(pdb_line)
00054                            == 'A'),out << "");
00055 };
00056 
00057 //! Select all non-alternative ATOM records
00058 class ATOMPDBSelector: public NonAlternativePDBSelector {
00059 public:
00060   IMP_PDB_SELECTOR(ATOMPDBSelector,
00061                    return NonAlternativePDBSelector::get_is_selected(pdb_line)
00062                    && internal::is_ATOM_rec(pdb_line),out << "");
00063 };
00064 
00065 
00066 //! Select all CA ATOM records
00067 class CAlphaPDBSelector : public NonAlternativePDBSelector {
00068  public:
00069   IMP_PDB_SELECTOR(CAlphaPDBSelector,
00070                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00071                      return false;
00072                    }
00073                    const std::string type = internal::atom_type(pdb_line);
00074                    return (type[1] == 'C' && type[2] == 'A'
00075                            && type[3] == ' '),out << "");
00076 };
00077 
00078 //! Select all CB ATOM records
00079 class CBetaPDBSelector: public NonAlternativePDBSelector {
00080  public:
00081   IMP_PDB_SELECTOR(CBetaPDBSelector,
00082                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)){
00083                      return false;
00084                    }
00085                    const std::string type = internal::atom_type(pdb_line);
00086                    return (type[1] == 'C' && type[2] == 'B'
00087                            && type[3] == ' '),out << "");
00088 };
00089 
00090 //! Select all C (not CA or CB) ATOM records
00091 class CPDBSelector: public NonAlternativePDBSelector {
00092  public:
00093   IMP_PDB_SELECTOR(CPDBSelector,
00094                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00095                      return false;
00096                    }
00097                    const std::string type = internal::atom_type(pdb_line);
00098                    return (type[1] == 'C' && type[2] == ' ' && type[3] == ' '),
00099                    out << ""
00100                    );
00101 };
00102 
00103 //! Select all N ATOM records
00104 class NPDBSelector: public NonAlternativePDBSelector {
00105  public:
00106   IMP_PDB_SELECTOR(NPDBSelector,
00107                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00108                      return false;
00109                    }
00110                    const std::string type = internal::atom_type(pdb_line);
00111                    return (type[1] == 'N' && type[2] == ' ' && type[3] == ' '),
00112                    out << ""
00113                    );
00114 };
00115 
00116 //! Defines a selector that will pick every ATOM and HETATM record
00117 class AllPDBSelector : public PDBSelector {
00118 public:
00119   IMP_PDB_SELECTOR(AllPDBSelector, return true || pdb_line.empty(),
00120                    out << "");
00121 };
00122 
00123 //! Select all ATOM and HETATMrecords with the given chain ids
00124 class ChainPDBSelector : public NonAlternativePDBSelector {
00125  public:
00126   IMP_PDB_SELECTOR(ChainPDBSelector,
00127                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00128                      return false;
00129                    }
00130                    for(int i=0; i < (int)chains_.length(); i++) {
00131                      if(internal::atom_chain_id(pdb_line) == chains_[i])
00132                        return true;
00133                    }
00134                    return false,out << chains_);
00135   //! The chain id can be any character in chains
00136   ChainPDBSelector(const std::string &chains): chains_(chains) {}
00137  private:
00138   std::string chains_;
00139 };
00140 
00141 //! Select all non-water ATOM and HETATMrecords
00142 class WaterPDBSelector : public NonAlternativePDBSelector {
00143  public:
00144   IMP_PDB_SELECTOR(WaterPDBSelector,
00145                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00146                      return false;
00147                    }
00148                    const std::string res_name
00149                       = internal::atom_residue_name(pdb_line);
00150                    return ((res_name[0]=='H' && res_name[1] =='O'
00151                             && res_name[2]=='H') ||
00152                            (res_name[0]=='D' && res_name[1] =='O'
00153                             && res_name[2]=='D')),
00154                    out << ""
00155                    );
00156 };
00157 
00158 //! Select all hydrogen ATOM and HETATM records
00159 class HydrogenPDBSelector : public NonAlternativePDBSelector {
00160   bool is_hydrogen(std::string pdb_line) const {
00161     if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00162       return false;
00163     }
00164     std::string elem = internal::atom_element(pdb_line);
00165     boost::trim(elem);
00166     // determine if the line is hydrogen atom as follows:
00167     // 1. if the record has element field (columns 76-77),
00168     // check that it is indeed H. Note that it may be missing
00169     // in some files.
00170     // some programms do not output element, so the ATOM
00171     // line can be shorter.
00172     if(elem.length() == 1 && elem[0]=='H') return true;
00173     // 2. support elements that starts with H: He, Ho, Hf, Hg
00174     if(elem.length() == 2 && elem[0]=='H' &&
00175        (elem[1]=='E' || elem[1]=='e' || elem[1]=='O'
00176         || elem[1]=='o' ||
00177         elem[1]=='F' || elem[1]=='f' || elem[1]=='G'
00178         || elem[1]=='g'))
00179       return false;
00180     // 3. if no hydrogen is found in the element record,
00181     // try atom type field.
00182     // some NMR structures have 'D' for labeled hydrogens
00183     std::string atom_name = internal::atom_type(pdb_line);
00184     return (// " HXX" or " DXX" or "1HXX" ...
00185             ((atom_name[0] == ' ' || isdigit(atom_name[0])) &&
00186              (atom_name[1] == 'H' || atom_name[1] == 'D')) ||
00187             // "HXXX" or "DXXX"
00188             (atom_name[0] == 'H' || atom_name[0] == 'D'));
00189   }
00190  public:
00191   IMP_PDB_SELECTOR(HydrogenPDBSelector,
00192                    return is_hydrogen(pdb_line);,
00193                    out << "");
00194 };
00195 
00196 //! Select non water and non hydrogen atoms
00197 class NonWaterNonHydrogenPDBSelector : public NonAlternativePDBSelector {
00198   IMP::internal::OwnerPointer<PDBSelector> ws_, hs_;
00199  public:
00200   NonWaterNonHydrogenPDBSelector(): ws_(new WaterPDBSelector()),
00201                                     hs_(new HydrogenPDBSelector()){}
00202   IMP_PDB_SELECTOR(NonWaterNonHydrogenPDBSelector,
00203                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00204                      return false;
00205                    }
00206                    return (! ws_->get_is_selected(pdb_line)
00207                            && ! hs_->get_is_selected(pdb_line)),
00208                    out << "");
00209 };
00210 
00211 //! Select all non-water non-alternative ATOM and HETATM records
00212 class NonWaterPDBSelector : public NonAlternativePDBSelector {
00213   IMP::internal::OwnerPointer<PDBSelector> ws_;
00214  public:
00215   NonWaterPDBSelector(): ws_(new WaterPDBSelector()){}
00216   IMP_PDB_SELECTOR(NonWaterPDBSelector,
00217                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00218                      return false;
00219                    }
00220                    return( ! ws_->get_is_selected(pdb_line)),
00221                    out << *ws_);
00222 };
00223 
00224 //! Select all P ATOM records
00225 class PPDBSelector : public NonAlternativePDBSelector {
00226  public:
00227   IMP_PDB_SELECTOR(PPDBSelector,
00228                    if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
00229                      return false;
00230                    }
00231                    const std::string type = internal::atom_type(pdb_line);
00232                    return (type[1] == 'P' && type[2] == ' '),
00233                    out << "");
00234 };
00235 
00236 // these do not work in python as the wrapped selectors get cleaned up
00237 //! Select atoms which are selected by both selectors
00238 /** To use do something like
00239     \code
00240     read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
00241     \endcode
00242  */
00243 class AndPDBSelector: public PDBSelector {
00244   const IMP::internal::OwnerPointer<PDBSelector> a_, b_;
00245 public:
00246   IMP_PDB_SELECTOR(AndPDBSelector,
00247                    return a_->get_is_selected(pdb_line)
00248                    && b_->get_is_selected(pdb_line),
00249                    out << *a_ << " and " << *b_);
00250   AndPDBSelector( PDBSelector *a, PDBSelector *b): a_(a), b_(b){}
00251 };
00252 
00253 //! Select atoms which are selected by either selector
00254 /** To use do something like
00255     \code
00256     read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
00257     \endcode
00258  */
00259 class OrPDBSelector: public PDBSelector {
00260   const IMP::internal::OwnerPointer<PDBSelector> a_, b_;
00261 public:
00262   IMP_PDB_SELECTOR(OrPDBSelector,
00263                    return a_->get_is_selected(pdb_line)
00264                    || b_->get_is_selected(pdb_line),
00265                    out << *a_ << " or " << *b_);
00266   OrPDBSelector( PDBSelector *a, PDBSelector *b): a_(a), b_(b){}
00267 };
00268 
00269 //! Select atoms which not selected by a given selector
00270 /** To use do something like
00271     \code
00272     read_pdb(name, m, NotPDBSelector(PPDBSelector()));
00273     \endcode
00274  */
00275 class NotPDBSelector: public PDBSelector {
00276   const IMP::internal::OwnerPointer<PDBSelector> a_;
00277 public:
00278   IMP_PDB_SELECTOR(NotPDBSelector,
00279                    return !a_->get_is_selected(pdb_line),
00280                    out << "not " << *a_);
00281   NotPDBSelector( PDBSelector *a): a_(a){}
00282 };
00283 
00284 
00285 /** @name PDB Reading
00286     \anchor pdb_in
00287    The read PDB methods produce a hierarchy that looks as follows:
00288     - One Atom per ATOM or HETATM record in the PDB.
00289     - All Atom particles have a parent which is a Residue.
00290     - All Residue particles have a parent which is a Chain.
00291 
00292     Waters are currently dropped if they are ATOM records. This can be fixed.
00293 
00294     The read_pdb() functions should successfully parse all valid pdb files. It
00295     can produce warnings on files which are not valid. It will attempt to read
00296     such files, but all bets are off.
00297 
00298     When reading PDBs, PDBSelector objects can be used to choose to only process
00299     certain record types. See the class documentation for more information.
00300     When no PDB selector is supplied for reading, the
00301     NonWaterPDBSelector is used.
00302 
00303     Set the IMP::LogLevel to IMP::VERBOSE to see details of parse errors.
00304 */
00305 //!@{
00306 
00307 /** Read a all the molecules in the first model of the
00308     pdb file.
00309 
00310     \relatesalso Hierarchy
00311  */
00312 IMPATOMEXPORT Hierarchy read_pdb(TextInput in,
00313                                  Model* model);
00314 
00315 /** \relatesalso Hierarchy
00316  */
00317 IMPATOMEXPORT Hierarchy
00318 read_pdb(TextInput in,
00319          Model* model,
00320          PDBSelector* selector,
00321          bool select_first_model = true
00322 #ifndef IMP_DOXYGEN
00323          , bool no_radii=false
00324 #endif
00325          );
00326 
00327 
00328 
00329 /** \relatesalso Hierarchy
00330  */
00331 IMPATOMEXPORT Hierarchies read_multimodel_pdb(TextInput in,
00332                                               Model *model,
00333                                               PDBSelector* selector);
00334 /** @} */
00335 
00336 /** @name PDB Writing
00337     \anchor pdb_out
00338     The methods to write a PDBs expects a Hierarchy that looks as follows:
00339     - all leaves are Atom particles
00340     - all Atom particles have Residue particles as parents
00341 
00342     All Residue particles that have a Chain particle as an ancestor
00343     are considered part of a protein, DNA or RNA, ones without are
00344     considered heterogens.
00345 
00346     The functions produce files that are not valid PDB files,
00347     eg only ATOM/HETATM lines are printed for all Atom particles
00348     in the hierarchy. Complain if your favorite program can't read them and
00349     we might fix it.
00350 */
00351 //!@{
00352 
00353 /** \relatesalso Hierarchy
00354 */
00355 IMPATOMEXPORT void write_pdb(Hierarchy mhd,
00356                              TextOutput out);
00357 /** \relatesalso Hierarchy
00358 */
00359 IMPATOMEXPORT void write_pdb(const Hierarchies &mhd,
00360                              TextOutput out);
00361 
00362 /** \relatesalso Hierarchy
00363 */
00364 IMPATOMEXPORT void write_multimodel_pdb(
00365                         const Hierarchies& mhd, TextOutput out);
00366 
00367 /** @} */
00368 
00369 
00370 #ifndef IMP_DOXYGEN
00371 
00372 /**
00373    This function returns a string in PDB ATOM format
00374 */
00375 IMPATOMEXPORT std::string get_pdb_string(const algebra::VectorD<3>& v,
00376                                      int index = -1,
00377                                      AtomType at = AT_C,
00378                                      ResidueType rt = atom::ALA,
00379                                      char chain = ' ',
00380                                      int res_index = 1,
00381                                      char res_icode = ' ',
00382                                      double occpancy = 1.00,
00383                                      double tempFactor = 0.00,
00384                                      Element e = C);
00385 
00386 /**
00387    This function returns a connectivity string in PDB format
00388   /note The CONECT records specify connectivity between atoms for which
00389       coordinates are supplied. The connectivity is described using
00390       the atom serial number as found in the entry.
00391   /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
00392 */
00393 IMPATOMEXPORT std::string get_pdb_conect_record_string(int,int);
00394 #endif
00395 
00396 
00397 
00398 /** \class WritePDBOptimizerState
00399     This writes a PDB file at the specified interval during optimization.
00400     The file name should not contain
00401     %1% (if it does not, the same file will be overwritten each time).
00402 
00403     \class WritePDBFailureHandler
00404     Write a PDB when an error occurs.
00405 
00406     \requires{class WriteBinaryFailureHandler, NetCDF}
00407 */
00408 IMP_MODEL_SAVE(WritePDB, (const atom::Hierarchies& mh, std::string file_name),
00409                atom::Hierarchies mh_;,
00410                mh_=mh;,
00411                ,
00412                {
00413                  IMP_LOG(TERSE, "Writing pdb file " << file_name << std::endl);
00414                  atom::write_pdb(mh_,file_name);
00415                });
00416 
00417 
00418 IMPATOM_END_NAMESPACE
00419 
00420 #endif /* IMPATOM_PDB_H */

Generated on Thu Mar 24 2011 02:01:39 for IMP by doxygen 1.7.3