diff --git a/modules/atom/include/Hierarchy.h b/modules/atom/include/Hierarchy.h index ef0fc918c3..2b7cf34841 100644 --- a/modules/atom/include/Hierarchy.h +++ b/modules/atom/include/Hierarchy.h @@ -451,7 +451,7 @@ IMPATOMEXPORT bool get_is_heterogen(Hierarchy h); //! Clone the Hierarchy /** This method copies the Bond, Bonded, Atom, - Residue, and Domain data and the particle name to the + Residue, Domain, and provenance data and the particle name to the new copies in addition to the Hierarchy relationships. \relates Hierarchy @@ -460,7 +460,7 @@ IMPATOMEXPORT Hierarchy create_clone(Hierarchy d); //! Clone the node in the Hierarchy /** This method copies the Atom, - Residue, Chain and Domain data and the particle name. + Residue, Chain, Domain, and provenance data and the particle name. \relates Hierarchy */ @@ -470,7 +470,8 @@ IMPATOMEXPORT Hierarchy create_clone_one(Hierarchy d); /** All bonds connecting to these atoms are destroyed as are hierarchy links in the Hierarchy and the particles are removed from the Model. If this particle has a parent, it is - removed from the parent. + removed from the parent. Any provenance information for this + Hierarchy is also removed. \relates Hierarchy */ IMPATOMEXPORT void destroy(Hierarchy d); diff --git a/modules/atom/src/Hierarchy.cpp b/modules/atom/src/Hierarchy.cpp index d2012832c9..41b8cecd0c 100644 --- a/modules/atom/src/Hierarchy.cpp +++ b/modules/atom/src/Hierarchy.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -437,6 +438,11 @@ Hierarchy clone_internal(Hierarchy d, if (Representation::get_is_setup(d.get_particle())) { nd = Representation::setup_particle(p, Representation(d.get_particle())); } + if (core::Provenanced::get_is_setup(d.get_particle())) { + core::Provenanced pd(d.get_particle()); + core::Provenance prov = core::create_clone(pd.get_provenance()); + core::Provenanced::setup_particle(p, prov); + } if (nd == Hierarchy()) nd = Hierarchy::setup_particle(p); using core::XYZ; @@ -531,6 +537,14 @@ void destroy(Hierarchy d) { destroy_bond(b.get_bond(b.get_number_of_bonds() - 1)); } } + if (core::Provenanced::get_is_setup(all[i])) { + core::Provenance prov = core::Provenanced(all[i]).get_provenance(); + while (prov) { + core::Provenance previous = prov.get_previous(); + prov.get_model()->remove_particle(prov.get_particle_index()); + prov = previous; + } + } Hierarchy hc(all[i]); while (hc.get_number_of_children() > 0) { hc.remove_child(hc.get_child(hc.get_number_of_children() - 1)); diff --git a/modules/atom/src/pdb.cpp b/modules/atom/src/pdb.cpp index 7ffa78fcf0..8ae10f84af 100644 --- a/modules/atom/src/pdb.cpp +++ b/modules/atom/src/pdb.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -244,11 +245,18 @@ Particle* residue_particle(Model* m, const std::string& pdb_line) { return p; } -Particle* chain_particle(Model* m, char chain_id) { +Particle* chain_particle(Model* m, char chain_id, std::string filename) { Particle* p = new Particle(m); Chain::setup_particle(p, chain_id); p->set_name(std::string("Chain " + std::string(1, chain_id))); Molecule::setup_particle(p); + + // Set provenance of this chain + core::StructureProvenance sp + = core::StructureProvenance::setup_particle(new Particle(m), + filename, std::string(1, chain_id)); + core::add_provenance(m, p->get_index(), sp); + return p; } } @@ -350,7 +358,7 @@ Hierarchies read_pdb(std::istream& in, std::string name, Model* model, if (cp == nullptr || chain != curr_chain) { curr_chain = chain; // create new chain particle - cp = chain_particle(model, chain); + cp = chain_particle(model, chain, name); chain_name_set = false; Hierarchy(root_p).add_child(Chain(cp)); rp = nullptr; // make sure we get a new residue diff --git a/modules/atom/test/test_pdb.py b/modules/atom/test/test_pdb.py index dae6551b4d..4d1a9eb0df 100644 --- a/modules/atom/test/test_pdb.py +++ b/modules/atom/test/test_pdb.py @@ -3,6 +3,7 @@ import IMP import IMP.test import IMP.atom +import IMP.core class Tests(IMP.test.TestCase): @@ -46,7 +47,7 @@ def test_read(self): #! read PDB mp = IMP.atom.read_pdb(self.open_input_file("input.pdb"), m, IMP.atom.NonWaterPDBSelector()) - self.assertEqual(len(m.get_particle_indexes()), 1132) + self.assertEqual(len(m.get_particle_indexes()), 1133) # IMP.atom.show_molecular_hierarchy(mp) IMP.atom.show(mp) IMP.atom.add_bonds(mp) @@ -58,7 +59,7 @@ def test_read(self): m2 = IMP.Model() mp = IMP.atom.read_pdb(self.open_input_file("input.pdb"), m2, IMP.atom.CAlphaPDBSelector()) - self.assertEqual(len(m2.get_particle_indexes()), 260) + self.assertEqual(len(m2.get_particle_indexes()), 261) ps = IMP.atom.get_by_type(mp, IMP.atom.ATOM_TYPE) self.assertEqual(len(ps), 129) IMP.atom.add_bonds(mp) @@ -218,5 +219,21 @@ def test_indexes(self): lvs = IMP.atom.get_leaves(hp) self.assertEqual(IMP.atom.Atom(lvs[2]).get_input_index(), 3) + def test_provenance(self): + """Test that StructureProvenance is set""" + m = IMP.Model() + fname = self.get_input_file_name("hydrogen.pdb") + mp = IMP.atom.read_pdb(fname, m) + chains = IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE) + self.assertEqual(len(chains), 5) + for c in chains: + self.assertTrue(IMP.core.Provenanced.get_is_setup(c)) + p = IMP.core.Provenanced(c).get_provenance() + self.assertTrue(IMP.core.StructureProvenance.get_is_setup(p)) + sp = IMP.core.StructureProvenance(p) + self.assertEqual(sp.get_filename(), fname) + self.assertEqual(sp.get_chain_id(), IMP.atom.Chain(c).get_id()) + + if __name__ == '__main__': IMP.test.main() diff --git a/modules/core/include/provenance.h b/modules/core/include/provenance.h new file mode 100644 index 0000000000..3de4bc092d --- /dev/null +++ b/modules/core/include/provenance.h @@ -0,0 +1,405 @@ +/** + * \file IMP/core/provenance.h + * \brief Classes to track how the model was created. + * + * Copyright 2007-2017 IMP Inventors. All rights reserved. + */ + +#ifndef IMPCORE_PROVENANCE_H +#define IMPCORE_PROVENANCE_H + +#include + +#include +#include +#include +#include +#include +#include + +IMPCORE_BEGIN_NAMESPACE + +//! Track how parts of the system were created. +/** Particles are linked with this decorator into a directed acyclic graph + that tracks all IMP transformations of the system all the way back to + raw inputs (such as PDB files). + + Typically, part of an IMP::Model (usually an atom::Hierarchy particle) + is decorated with Provenanced that points to the root of this graph. + */ +class IMPCOREEXPORT Provenance : public Decorator { + static void do_setup_particle(Model *m, ParticleIndex pi) { + // Use self-index to indicate no previous provenance is set yet + m->add_attribute(get_previous_key(), pi, pi); + } + + static ParticleIndexKey get_previous_key(); + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_previous_key(), pi); + } + + //! \return the previous provenance, or Provenance() if none exists. + Provenance get_previous() const { + ParticleIndex pi = get_model()->get_attribute(get_previous_key(), + get_particle_index()); + // self-index indicates no previous provenance is set yet + if (pi == get_particle_index()) { + return Provenance(); + } else { + return Provenance(get_model(), pi); + } + } + + //! Set the previous provenance. + /** This can be used to show that a given part of the system was + generated through multiple steps in order, for example by first + being read from a PDB file, then sampled, filtered, and finally + clustered. + + \note it is considered an error to try to set this more than once. + */ + void set_previous(Provenance p) { + IMP_USAGE_CHECK(get_model()->get_attribute(get_previous_key(), + get_particle_index()) + == get_particle_index(), + "Previous provenance is already set"); + get_model()->set_attribute(get_previous_key(), + get_particle_index(), p.get_particle_index()); + } + + IMP_DECORATOR_METHODS(Provenance, Decorator); + IMP_DECORATOR_SETUP_0(Provenance); +}; + +//! Track creation of a system fragment from a PDB file. +class IMPCOREEXPORT StructureProvenance : public Provenance { + static void do_setup_particle(Model *m, ParticleIndex pi, + std::string filename, + std::string chain_id) { + Provenance::setup_particle(m, pi); + IMP_USAGE_CHECK(!filename.empty(), "The filename cannot be empty."); + m->add_attribute(get_filename_key(), pi, filename); + m->add_attribute(get_chain_key(), pi, chain_id); + } + + static void do_setup_particle(Model *m, ParticleIndex pi, + StructureProvenance o) { + do_setup_particle(m, pi, o.get_filename(), o.get_chain_id()); + } + + static StringKey get_filename_key(); + static StringKey get_chain_key(); + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_filename_key(), pi) + && m->get_has_attribute(get_chain_key(), pi); + } + + //! Set the filename + void set_filename(std::string filename) const { + IMP_USAGE_CHECK(!filename.empty(), "The filename cannot be empty"); + return get_model()->set_attribute(get_filename_key(), get_particle_index(), + filename); + } + + //! \return the filename + std::string get_filename() const { + return get_model()->get_attribute(get_filename_key(), get_particle_index()); + } + + //! Set the chain ID + void set_chain_id(std::string chain_id) const { + return get_model()->set_attribute(get_chain_key(), get_particle_index(), + chain_id); + } + + //! \return the chain ID + std::string get_chain_id() const { + return get_model()->get_attribute(get_chain_key(), get_particle_index()); + } + + IMP_DECORATOR_METHODS(StructureProvenance, Provenance); + IMP_DECORATOR_SETUP_2(StructureProvenance, std::string, filename, + std::string, chain_id); + IMP_DECORATOR_SETUP_1(StructureProvenance, StructureProvenance, o); +}; + +//! Track creation of a system fragment from sampling. +/** Part of the system (usually the top of a Hierarchy) tagged with this + decorator is understood to be a single frame from an ensemble of + multiple frames generated with some sampling method (e.g. Monte Carlo). + Additionally, the number of iterations of the sampler used to generate + each frame can be stored, if known and applicable. + The rest of the frames are generally stored in a file (e.g. an RMF file). + */ +class IMPCOREEXPORT SampleProvenance : public Provenance { + static void do_setup_particle(Model *m, ParticleIndex pi, + std::string method, int frames, + int iterations) { + validate_method(method); + Provenance::setup_particle(m, pi); + m->add_attribute(get_method_key(), pi, method); + m->add_attribute(get_frames_key(), pi, frames); + m->add_attribute(get_iterations_key(), pi, iterations); + } + + static void do_setup_particle(Model *m, ParticleIndex pi, + SampleProvenance o) { + do_setup_particle(m, pi, o.get_method(), o.get_number_of_frames(), + o.get_number_of_iterations()); + } + + static StringKey get_method_key(); + static IntKey get_frames_key(); + static IntKey get_iterations_key(); + + static std::set& get_allowed_methods(); + + static void validate_method(std::string method) { + IMP_USAGE_CHECK(get_allowed_methods().find(method) + != get_allowed_methods().end(), + "Invalid sampling method"); + } + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_method_key(), pi) + && m->get_has_attribute(get_iterations_key(), pi) + && m->get_has_attribute(get_frames_key(), pi); + } + + //! Set the sampling method + void set_method(std::string method) const { + validate_method(method); + return get_model()->set_attribute(get_method_key(), get_particle_index(), + method); + } + + //! \return the sampling method + std::string get_method() const { + return get_model()->get_attribute(get_method_key(), get_particle_index()); + } + + //! Set the number of frames + void set_number_of_frames(int frames) const { + return get_model()->set_attribute(get_frames_key(), get_particle_index(), + frames); + } + + //! \return the number of frames + int get_number_of_frames() const { + return get_model()->get_attribute(get_frames_key(), get_particle_index()); + } + + //! Set the number of iterations + void set_number_of_iterations(int iterations) const { + return get_model()->set_attribute(get_iterations_key(), + get_particle_index(), iterations); + } + + //! \return the number of iterations + int get_number_of_iterations() const { + return get_model()->get_attribute(get_iterations_key(), + get_particle_index()); + } + + IMP_DECORATOR_METHODS(SampleProvenance, Provenance); + IMP_DECORATOR_SETUP_3(SampleProvenance, std::string, method, int, frames, + int, iterations); + IMP_DECORATOR_SETUP_1(SampleProvenance, SampleProvenance, o); +}; + +//! Track creation of a system fragment by combination. +/** Part of the system (usually the top of a Hierarchy) tagged with this + decorator is understood to be a single frame within an ensemble that + was created by combining a number of independent runs. One of those runs + should be the 'previous' provenance. The runs should be + essentially identical, differing at most only in the number of frames. + The total size of the resulting ensemble is stored here. + */ +class IMPCOREEXPORT CombineProvenance : public Provenance { + static void do_setup_particle(Model *m, ParticleIndex pi, int runs, + int frames) { + Provenance::setup_particle(m, pi); + m->add_attribute(get_runs_key(), pi, runs); + m->add_attribute(get_frames_key(), pi, frames); + } + + static void do_setup_particle(Model *m, ParticleIndex pi, + CombineProvenance o) { + do_setup_particle(m, pi, o.get_number_of_runs(), o.get_number_of_frames()); + } + + static IntKey get_runs_key(); + static IntKey get_frames_key(); + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_frames_key(), pi) + && m->get_has_attribute(get_runs_key(), pi); + } + + //! Set the total number of frames + void set_number_of_frames(int frames) const { + return get_model()->set_attribute(get_frames_key(), get_particle_index(), + frames); + } + + //! \return the total number of frames + int get_number_of_frames() const { + return get_model()->get_attribute(get_frames_key(), get_particle_index()); + } + + //! Set the number of runs + void set_number_of_runs(int runs) const { + return get_model()->set_attribute(get_runs_key(), get_particle_index(), + runs); + } + + //! \return the number of runs + int get_number_of_runs() const { + return get_model()->get_attribute(get_runs_key(), get_particle_index()); + } + + IMP_DECORATOR_METHODS(CombineProvenance, Provenance); + IMP_DECORATOR_SETUP_2(CombineProvenance, int, runs, int, frames); + IMP_DECORATOR_SETUP_1(CombineProvenance, CombineProvenance, o); +}; + +//! Track creation of a system fragment by filtering. +/** Part of the system (usually the top of a Hierarchy) tagged with this + decorator is understood to be a single frame within an ensemble that + resulted from filtering a larger ensemble (the 'previous' + provenance) by discarding models with scores above the threshold. + */ +class IMPCOREEXPORT FilterProvenance : public Provenance { + static void do_setup_particle(Model *m, ParticleIndex pi, double threshold, + int frames) { + Provenance::setup_particle(m, pi); + m->add_attribute(get_threshold_key(), pi, threshold); + m->add_attribute(get_frames_key(), pi, frames); + } + static void do_setup_particle(Model *m, ParticleIndex pi, + FilterProvenance o) { + do_setup_particle(m, pi, o.get_threshold(), o.get_number_of_frames()); + } + + static FloatKey get_threshold_key(); + static IntKey get_frames_key(); + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_threshold_key(), pi) + && m->get_has_attribute(get_frames_key(), pi); + } + + //! Set the number of frames + void set_number_of_frames(int frames) const { + return get_model()->set_attribute(get_frames_key(), get_particle_index(), + frames); + } + + //! \return the number of frames + int get_number_of_frames() const { + return get_model()->get_attribute(get_frames_key(), get_particle_index()); + } + + //! Set the score threshold + void set_threshold(double threshold) const { + return get_model()->set_attribute(get_threshold_key(), get_particle_index(), + threshold); + } + + //! \return the threshold + double get_threshold() const { + return get_model()->get_attribute(get_threshold_key(), + get_particle_index()); + } + + IMP_DECORATOR_METHODS(FilterProvenance, Provenance); + IMP_DECORATOR_SETUP_2(FilterProvenance, double, threshold, int, frames); + IMP_DECORATOR_SETUP_1(FilterProvenance, FilterProvenance, o); +}; + +//! Track creation of a system fragment from clustering. +/** Part of the system (usually the top of a Hierarchy) tagged with this + decorator is understood to be a single frame inside a cluster of + specified size. The rest of the cluster members are generally stored + in a file (e.g. an RMF file). + */ +class IMPCOREEXPORT ClusterProvenance : public Provenance { + static void do_setup_particle(Model *m, ParticleIndex pi, int members) { + Provenance::setup_particle(m, pi); + m->add_attribute(get_members_key(), pi, members); + } + + static void do_setup_particle(Model *m, ParticleIndex pi, + ClusterProvenance o) { + do_setup_particle(m, pi, o.get_number_of_members()); + } + + static IntKey get_members_key(); + +public: + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_members_key(), pi); + } + + //! Set the number of cluster members + void set_number_of_members(int members) const { + return get_model()->set_attribute(get_members_key(), get_particle_index(), + members); + } + + //! \return the number of cluster members + int get_number_of_members() const { + return get_model()->get_attribute(get_members_key(), get_particle_index()); + } + + IMP_DECORATOR_METHODS(ClusterProvenance, Provenance); + IMP_DECORATOR_SETUP_1(ClusterProvenance, int, members); + IMP_DECORATOR_SETUP_1(ClusterProvenance, ClusterProvenance, o); +}; + +//! Tag part of the system to track how it was created. +class IMPCOREEXPORT Provenanced : public Decorator { + static void do_setup_particle(Model *m, ParticleIndex pi, + Provenance p) { + m->add_attribute(get_provenance_key(), pi, p.get_particle_index()); + } + + static ParticleIndexKey get_provenance_key(); +public: + + static bool get_is_setup(Model *m, ParticleIndex pi) { + return m->get_has_attribute(get_provenance_key(), pi); + } + + Provenance get_provenance() const { + ParticleIndex pi = get_model()->get_attribute(get_provenance_key(), + get_particle_index()); + return Provenance(get_model(), pi); + } + + void set_provenance(Provenance p) const { + get_model()->set_attribute(get_provenance_key(), get_particle_index(), + p.get_particle_index()); + } + + IMP_DECORATOR_METHODS(Provenanced, Decorator); + IMP_DECORATOR_SETUP_1(Provenanced, Provenance, p); +}; + +//! Add provenance to part of the model. +IMPCOREEXPORT void add_provenance(Model *m, ParticleIndex pi, + Provenance p); + +//! Clone provenance (including previous provenance) +IMPCOREEXPORT Provenance create_clone(Provenance p); + +IMPCORE_END_NAMESPACE + +#endif /* IMPCORE_PROVENANCE_H */ diff --git a/modules/core/pyext/swig.i-in b/modules/core/pyext/swig.i-in index b55fe442b5..af8207ef74 100644 --- a/modules/core/pyext/swig.i-in +++ b/modules/core/pyext/swig.i-in @@ -154,6 +154,13 @@ IMP_SWIG_DIRECTOR(IMP::core, HierarchyVisitor); IMP_SWIG_DECORATOR( IMP::core, Direction, Directions); IMP_SWIG_DECORATOR( IMP::core, DirectionAngle, DirectionAngles); IMP_SWIG_DECORATOR( IMP::core, Surface, Surfaces); +IMP_SWIG_DECORATOR(IMP::core, Provenance, Provenances); +IMP_SWIG_DECORATOR(IMP::core, StructureProvenance, StructureProvenances); +IMP_SWIG_DECORATOR(IMP::core, SampleProvenance, SampleProvenances); +IMP_SWIG_DECORATOR(IMP::core, ClusterProvenance, ClusterProvenances); +IMP_SWIG_DECORATOR(IMP::core, CombineProvenance, CombineProvenances); +IMP_SWIG_DECORATOR(IMP::core, FilterProvenance, FilterProvenances); +IMP_SWIG_DECORATOR(IMP::core, Provenanced, Provenanceds); IMP_SWIG_VALUE( IMP::core, HierarchyCounter, HierarchyCounters); IMP_SWIG_VALUE( IMP::core, HierarchyTraits, HierarchyTraitsList); @@ -348,6 +355,7 @@ void visit_depth_first(Hierarchy d, HierarchyVisitor *f) %include "IMP/core/model_statistics.h" %include "IMP/core/blame.h" %include "IMP/core/MultipleBinormalRestraint.h" +%include "IMP/core/provenance.h" %inline %{ namespace IMP { diff --git a/modules/core/src/provenance.cpp b/modules/core/src/provenance.cpp new file mode 100644 index 0000000000..80ad972eb9 --- /dev/null +++ b/modules/core/src/provenance.cpp @@ -0,0 +1,172 @@ +/** + * \file provenance.cpp + * \brief Classes to track how the model was created. + * + * Copyright 2007-2017 IMP Inventors. All rights reserved. + */ + +#include + +IMPCORE_BEGIN_NAMESPACE + +namespace { + +Provenance clone_one(Provenance prov) { + Particle *p = new IMP::Particle(prov.get_model()); + p->set_name(prov->get_name()); + + if (StructureProvenance::get_is_setup(prov.get_particle())) { + StructureProvenance::setup_particle(p, + StructureProvenance(prov.get_particle())); + } else if (SampleProvenance::get_is_setup(prov.get_particle())) { + SampleProvenance::setup_particle(p, + SampleProvenance(prov.get_particle())); + } else if (CombineProvenance::get_is_setup(prov.get_particle())) { + CombineProvenance::setup_particle(p, + CombineProvenance(prov.get_particle())); + } else if (FilterProvenance::get_is_setup(prov.get_particle())) { + FilterProvenance::setup_particle(p, + FilterProvenance(prov.get_particle())); + } else if (ClusterProvenance::get_is_setup(prov.get_particle())) { + ClusterProvenance::setup_particle(p, + ClusterProvenance(prov.get_particle())); + } else { + IMP_THROW("Unhandled provenance", IOException); + } + return Provenance(p); +} + +} // anonymous namespace + +ParticleIndexKey Provenanced::get_provenance_key() { + static const ParticleIndexKey provenance("provenance"); + return provenance; +} + +void Provenanced::show(std::ostream &out) const { + out << "Provenanced" << std::endl; +} + +ParticleIndexKey Provenance::get_previous_key() { + static const ParticleIndexKey previous("previous_provenance"); + return previous; +} + +void Provenance::show(std::ostream &out) const { + out << "Provenance" << std::endl; +} + +StringKey StructureProvenance::get_filename_key() { + static const StringKey filename("sp_filename"); + return filename; +} + +StringKey StructureProvenance::get_chain_key() { + static const StringKey chain("sp_chain"); + return chain; +} + +void StructureProvenance::show(std::ostream &out) const { + out << "StructureProvenance " << get_filename() << " " << get_chain_id() + << std::endl; +} + +std::set& SampleProvenance::get_allowed_methods() { + static std::set m; + if (m.empty()) { + m.insert("Monte Carlo"); + m.insert("Molecular Dynamics"); + } + return m; +} + +StringKey SampleProvenance::get_method_key() { + static const StringKey method("sp_method"); + return method; +} + +IntKey SampleProvenance::get_frames_key() { + static const IntKey frames("sp_frames"); + return frames; +} + +IntKey SampleProvenance::get_iterations_key() { + static const IntKey iterations("sp_iterations"); + return iterations; +} + +void SampleProvenance::show(std::ostream &out) const { + out << "SampleProvenance " << get_number_of_frames() << " of " + << get_method() << std::endl; +} + +IntKey CombineProvenance::get_runs_key() { + static const IntKey runs("cp_runs"); + return runs; +} + +IntKey CombineProvenance::get_frames_key() { + static const IntKey frames("cp_frames"); + return frames; +} + +void CombineProvenance::show(std::ostream &out) const { + out << "CombineProvenance of " << get_number_of_runs() + << " runs resulting in " << get_number_of_frames() + << " frames" << std::endl; +} + +IntKey ClusterProvenance::get_members_key() { + static const IntKey members("cp_members"); + return members; +} + +void ClusterProvenance::show(std::ostream &out) const { + out << "ClusterProvenance with " << get_number_of_members() + << " members" << std::endl; +} + +FloatKey FilterProvenance::get_threshold_key() { + static const FloatKey threshold("fp_threshold"); + return threshold; +} + +IntKey FilterProvenance::get_frames_key() { + static const IntKey frames("fp_frames"); + return frames; +} + +void FilterProvenance::show(std::ostream &out) const { + out << "FilterProvenance threshold " << get_threshold() + << " resulting in " << get_number_of_frames() << " frames" << std::endl; +} + +void add_provenance(Model *m, ParticleIndex pi, Provenance p) { + if (Provenanced::get_is_setup(m, pi)) { + // add the new provenance as a new root + Provenanced pd(m, pi); + Provenance old_provenance = pd.get_provenance(); + p.set_previous(old_provenance); + pd.set_provenance(p); + } else { + Provenanced::setup_particle(m, pi, p); + } +} + +Provenance create_clone(Provenance prov) { + Provenance root = clone_one(prov); + + Provenance newprov = root; + while (prov) { + Provenance previous = prov.get_previous(); + if (previous) { + Provenance newprevious = clone_one(previous); + newprov.set_previous(newprevious); + newprov = newprevious; + } + prov = previous; + } + return root; +} + +IMPCORE_END_NAMESPACE diff --git a/modules/core/test/test_provenance.py b/modules/core/test/test_provenance.py new file mode 100644 index 0000000000..2c0aa10f60 --- /dev/null +++ b/modules/core/test/test_provenance.py @@ -0,0 +1,182 @@ +from __future__ import print_function +import IMP +import IMP.test +import IMP.core + +class Tests(IMP.test.TestCase): + + def test_provenance(self): + """Test Provenance decorator""" + m = IMP.Model() + p = IMP.core.Provenance.setup_particle(m, IMP.Particle(m)) + self.assertTrue(IMP.core.Provenance.get_is_setup(p)) + self.assertFalse(p.get_previous()) + + p2 = IMP.core.Provenance.setup_particle(m, IMP.Particle(m)) + p.set_previous(p2) + self.assertEqual(p.get_previous(), p2) + + def test_structure_provenance(self): + """Test StructureProvenance decorator""" + m = IMP.Model() + p = IMP.core.StructureProvenance.setup_particle(m, IMP.Particle(m), + "testfile", "testchain") + self.assertTrue(IMP.core.StructureProvenance.get_is_setup(p)) + self.assertEqual(p.get_filename(), "testfile") + self.assertEqual(p.get_chain_id(), "testchain") + + def test_sample_provenance(self): + """Test SampleProvenance decorator""" + m = IMP.Model() + p = IMP.core.SampleProvenance.setup_particle(m, IMP.Particle(m), + "Monte Carlo", 100, 5) + self.assertTrue(IMP.core.SampleProvenance.get_is_setup(p)) + self.assertEqual(p.get_method(), "Monte Carlo") + p.set_method("Molecular Dynamics") + self.assertEqual(p.get_method(), "Molecular Dynamics") + if IMP.get_check_level() == IMP.USAGE_AND_INTERNAL: + self.assertRaises(IMP.UsageError, p.set_method, "Garbage") + self.assertRaises(IMP.UsageError, + IMP.core.SampleProvenance.setup_particle, m, IMP.Particle(m), + "Garbage", 100) + self.assertEqual(p.get_number_of_frames(), 100) + p.set_number_of_frames(200) + self.assertEqual(p.get_number_of_frames(), 200) + self.assertEqual(p.get_number_of_iterations(), 5) + p.set_number_of_iterations(42) + self.assertEqual(p.get_number_of_iterations(), 42) + + def test_combine_provenance(self): + """Test CombineProvenance decorator""" + m = IMP.Model() + p = IMP.core.CombineProvenance.setup_particle(m, IMP.Particle(m), 5, 42) + self.assertTrue(IMP.core.CombineProvenance.get_is_setup(p)) + self.assertEqual(p.get_number_of_runs(), 5) + p.set_number_of_runs(7) + self.assertEqual(p.get_number_of_runs(), 7) + self.assertEqual(p.get_number_of_frames(), 42) + p.set_number_of_frames(100) + self.assertEqual(p.get_number_of_frames(), 100) + + def test_filter_provenance(self): + """Test FilterProvenance decorator""" + m = IMP.Model() + p = IMP.core.FilterProvenance.setup_particle(m, IMP.Particle(m), 100.5, + 42) + self.assertTrue(IMP.core.FilterProvenance.get_is_setup(p)) + self.assertAlmostEqual(p.get_threshold(), 100.5, delta=0.01) + p.set_threshold(76.0) + self.assertAlmostEqual(p.get_threshold(), 76.0, delta=0.01) + self.assertEqual(p.get_number_of_frames(), 42) + p.set_number_of_frames(100) + self.assertEqual(p.get_number_of_frames(), 100) + + def test_cluster_provenance(self): + """Test ClusterProvenance decorator""" + m = IMP.Model() + p = IMP.core.ClusterProvenance.setup_particle(m, IMP.Particle(m), 10) + self.assertTrue(IMP.core.ClusterProvenance.get_is_setup(p)) + self.assertEqual(p.get_number_of_members(), 10) + p.set_number_of_members(42) + self.assertEqual(p.get_number_of_members(), 42) + + def test_provenanced(self): + """Test Provenanced decorator""" + m = IMP.Model() + p = IMP.core.Provenance.setup_particle(m, IMP.Particle(m)) + pd = IMP.core.Provenanced.setup_particle(m, IMP.Particle(m), p) + self.assertEqual(pd.get_provenance(), p) + pd.set_provenance(p) + self.assertEqual(pd.get_provenance(), p) + self.assertTrue(IMP.core.Provenanced.get_is_setup(pd)) + self.assertTrue(IMP.core.Provenance.get_is_setup(p)) + self.assertFalse(IMP.core.Provenanced.get_is_setup(p)) + self.assertFalse(IMP.core.Provenance.get_is_setup(pd)) + + def test_add_provenance(self): + """Test add_provenance()""" + m = IMP.Model() + prov1 = IMP.core.Provenance.setup_particle(m, IMP.Particle(m)) + prov2 = IMP.core.Provenance.setup_particle(m, IMP.Particle(m)) + + p = IMP.Particle(m) + self.assertFalse(IMP.core.Provenanced.get_is_setup(p)) + + IMP.core.add_provenance(m, p, prov1) + self.assertTrue(IMP.core.Provenanced.get_is_setup(p)) + pd = IMP.core.Provenanced(p) + self.assertTrue(pd.get_provenance(), prov1) + + IMP.core.add_provenance(m, p, prov2) + self.assertTrue(IMP.core.Provenanced.get_is_setup(p)) + self.assertTrue(pd.get_provenance(), prov2) + self.assertTrue(pd.get_provenance().get_previous(), prov1) + + def add_provenance(self, m): + struc = IMP.core.StructureProvenance.setup_particle( + m, IMP.Particle(m), "testfile", "testchain") + + samp = IMP.core.SampleProvenance.setup_particle( + m, IMP.Particle(m), "Monte Carlo", 100, 42) + samp.set_previous(struc) + + comb = IMP.core.CombineProvenance.setup_particle( + m, IMP.Particle(m), 4, 27) + comb.set_previous(samp) + + filt = IMP.core.FilterProvenance.setup_particle( + m, IMP.Particle(m), 100.5, 39) + filt.set_previous(comb) + + clus = IMP.core.ClusterProvenance.setup_particle(m, IMP.Particle(m), 10) + clus.set_previous(filt) + return clus + + def check_provenance(self, prov): + m = prov.get_model() + self.assertTrue(IMP.core.ClusterProvenance.get_is_setup(m, prov)) + clus = IMP.core.ClusterProvenance(m, prov) + self.assertEqual(clus.get_number_of_members(), 10) + + prov = prov.get_previous() + self.assertTrue(IMP.core.FilterProvenance.get_is_setup(m, prov)) + filt = IMP.core.FilterProvenance(m, prov) + self.assertAlmostEqual(filt.get_threshold(), 100.5, delta=1e-4) + self.assertEqual(filt.get_number_of_frames(), 39) + + prov = prov.get_previous() + self.assertTrue(IMP.core.CombineProvenance.get_is_setup(m, prov)) + comb = IMP.core.CombineProvenance(m, prov) + self.assertEqual(comb.get_number_of_runs(), 4) + self.assertEqual(comb.get_number_of_frames(), 27) + + prov = prov.get_previous() + self.assertTrue(IMP.core.SampleProvenance.get_is_setup(m, prov)) + samp = IMP.core.SampleProvenance(m, prov) + self.assertEqual(samp.get_method(), "Monte Carlo") + self.assertEqual(samp.get_number_of_frames(), 100) + self.assertEqual(samp.get_number_of_iterations(), 42) + + prov = prov.get_previous() + self.assertTrue(IMP.core.StructureProvenance.get_is_setup(m, prov)) + struc = IMP.core.StructureProvenance(m, prov) + self.assertEqual(struc.get_filename(), "testfile") + self.assertEqual(struc.get_chain_id(), "testchain") + + # Should be no more provenance + prov = prov.get_previous() + self.assertFalse(prov) + + def test_clone(self): + """Test create_clone""" + m = IMP.Model() + prov = self.add_provenance(m) + self.check_provenance(prov) + self.assertEqual(len(m.get_particle_indexes()), 5) + newprov = IMP.core.create_clone(prov) + self.assertEqual(len(m.get_particle_indexes()), 10) + self.check_provenance(newprov) + + +if __name__ == '__main__': + IMP.test.main() diff --git a/modules/rmf/dependency/RMF/.travis.yml b/modules/rmf/dependency/RMF/.travis.yml index b2cd003532..ba849a0e2d 100644 --- a/modules/rmf/dependency/RMF/.travis.yml +++ b/modules/rmf/dependency/RMF/.travis.yml @@ -26,9 +26,7 @@ script: - mkdir build - cd build - ../tools/coverage/setup.py - - PYTHON_INC=$(echo $(dirname $(which python))/../include/*/) - - PYTHON_LIB=$(echo /opt/python/2.7.*/lib/libpython2.7*.so) - - PYTHONPATH=`pwd`/coverage cmake .. -DCMAKE_BUILD_TYPE="$BUILD" -DCMAKE_CXX_FLAGS="$FLAGS" -DCMAKE_EXE_LINKER_FLAGS="$FLAGS" -DCMAKE_MODULE_LINKER_FLAGS="$FLAGS" -DCMAKE_SHARED_LINKER_FLAGS="$FLAGS" -DIMP_TEST_SETUP=$TEST_SETUP -DPYTHON_INCLUDE_DIR=$PYTHON_INC -DPYTHON_LIBRARY=$PYTHON_LIB + - PYTHONPATH=`pwd`/coverage cmake .. -DCMAKE_BUILD_TYPE="$BUILD" -DCMAKE_CXX_FLAGS="$FLAGS" -DCMAKE_EXE_LINKER_FLAGS="$FLAGS" -DCMAKE_MODULE_LINKER_FLAGS="$FLAGS" -DCMAKE_SHARED_LINKER_FLAGS="$FLAGS" -DIMP_TEST_SETUP=$TEST_SETUP - make -j 2 - export LD_PRELOAD=/lib/x86_64-linux-gnu/libSegFault.so - ctest -j 2 --output-on-failure -L $TESTS diff --git a/modules/rmf/dependency/RMF/ChangeLog.md b/modules/rmf/dependency/RMF/ChangeLog.md index 0fab052ae9..fa5227b1b2 100644 --- a/modules/rmf/dependency/RMF/ChangeLog.md +++ b/modules/rmf/dependency/RMF/ChangeLog.md @@ -1,6 +1,8 @@ Change Log {#changelog} ========== +- A new category of decorators 'provenance' allows information about how the + structure was generated to be added to the file. - The new RMF::decorator::Reference decorator allows for a node to mark another node as its reference. - The new RMF::decorator::ExplicitResolution decorator allows attaching an diff --git a/modules/rmf/dependency/RMF/doc/DecoratorsAndAttributes.md b/modules/rmf/dependency/RMF/doc/DecoratorsAndAttributes.md index bda6bc946c..98048e1e91 100644 --- a/modules/rmf/dependency/RMF/doc/DecoratorsAndAttributes.md +++ b/modules/rmf/dependency/RMF/doc/DecoratorsAndAttributes.md @@ -207,3 +207,33 @@ should be loaded as a child of the current node. | Name | Node Type | Attributes | |--------------------------------:|:-----------------------:|:-----------------------------------| | RMF::decorator::JournalArticle | RMF::ORGANIZATIONAL | title, journal, pubmed id, year, authors | + +# Provenance # {#provenance} + +The category name is `provenance`. It includes information about how the +structure was generated. + +## Attributes ## {#provenanceattributes} + +| Name | Type | Description | +|----------------------:|-------------:|:------------------------------------------| +| `structure filename` | string | File from which the structure was read | +| `structure chain` | string | Chain ID of the structure that was read | +| `sampling method` | string | Sampling method utilized | +| `sampling frames` | int | Number of frames in the sample ensemble | +| `sampling iterations` | int | Number of sampling iterations used | +| `combined runs` | int | Number of sampling runs utilized | +| `combined frames` | int | Total number of frames combined | +| `filter threshold` | float | Score threshold to discard bad models | +| `cluster members` | int | Number of members in a cluster | + + +## Decorators ## {#provenancedecorators} + +| Name | Node Type | Attributes | +|------------------------------------:|:-------------------:|:------------------------------------------------------| +| RMF::decorator::StructureProvenance | RMF::PROVENANCE | structure filename, structure chain | +| RMF::decorator::SampleProvenance | RMF::PROVENANCE | sampling method, sampling frames, sampling iterations | +| RMF::decorator::CombineProvenance | RMF::PROVENANCE | combined runs, combined frames | +| RMF::decorator::FilterProvenance | RMF::PROVENANCE | filter threshold, filter frames | +| RMF::decorator::ClusterProvenance | RMF::PROVENANCE | cluster members | diff --git a/modules/rmf/dependency/RMF/doc/MappingToRMF.md b/modules/rmf/dependency/RMF/doc/MappingToRMF.md index eaa091ce6b..b9ccb9512a 100644 --- a/modules/rmf/dependency/RMF/doc/MappingToRMF.md +++ b/modules/rmf/dependency/RMF/doc/MappingToRMF.md @@ -15,7 +15,7 @@ For nodes types, the following node shapes are used } \enddot And a dot means an RMF::ALIAS node. The key concept to keep in mind with RMF::ALIAS nodes is that, unlike everything else, they don't introduce a new entity, simply point to one that exists elsewhere. -See RMF::ROOT, RMF::REPRESENTATION, RMF::BOND, RMF::ORGANIZATIONAL, RMF::FEATURE for more information. +See RMF::ROOT, RMF::REPRESENTATION, RMF::BOND, RMF::ORGANIZATIONAL, RMF::PROVENANCE, RMF::FEATURE for more information. # PDB files # diff --git a/modules/rmf/dependency/RMF/include/RMF/decorators.h b/modules/rmf/dependency/RMF/include/RMF/decorators.h index de8d0e2007..b18db3e77e 100644 --- a/modules/rmf/dependency/RMF/include/RMF/decorators.h +++ b/modules/rmf/dependency/RMF/include/RMF/decorators.h @@ -16,6 +16,7 @@ #include "RMF/decorator/feature.h" #include "RMF/decorator/physics.h" #include "RMF/decorator/publication.h" +#include "RMF/decorator/provenance.h" #include "RMF/decorator/sequence.h" #include "RMF/decorator/shape.h" #include "RMF/decorator/bond.h" diff --git a/modules/rmf/dependency/RMF/include/RMF/enums.h b/modules/rmf/dependency/RMF/include/RMF/enums.h index 926486b681..22020ad424 100644 --- a/modules/rmf/dependency/RMF/include/RMF/enums.h +++ b/modules/rmf/dependency/RMF/include/RMF/enums.h @@ -57,6 +57,8 @@ extern RMFEXPORT const NodeType BOND; /** This includes nodes that are just RMF::ReferenceFrame nodes. */ extern RMFEXPORT const NodeType ORGANIZATIONAL; +//! Represent the process by which a structure was created +extern RMFEXPORT const NodeType PROVENANCE; #ifndef RMF_DOXYGEN //! An internal link to another node extern RMFEXPORT const NodeType LINK; diff --git a/modules/rmf/dependency/RMF/src/enums.cpp b/modules/rmf/dependency/RMF/src/enums.cpp index d9fdb1ae78..61a83f4a0e 100644 --- a/modules/rmf/dependency/RMF/src/enums.cpp +++ b/modules/rmf/dependency/RMF/src/enums.cpp @@ -24,6 +24,7 @@ const NodeType CUSTOM(5, "custom"); const NodeType BOND(6, "bond"); const NodeType ORGANIZATIONAL(7, "organizational"); const NodeType LINK(8, "link"); +const NodeType PROVENANCE(9, "provenance"); const FrameType INVALID_FRAME_TYPE(-1, "inv"); const FrameType STATIC(0, "static"); diff --git a/modules/rmf/dependency/RMF/src/show_hierarchy.cpp b/modules/rmf/dependency/RMF/src/show_hierarchy.cpp index 74aacfc810..3fee6acb08 100644 --- a/modules/rmf/dependency/RMF/src/show_hierarchy.cpp +++ b/modules/rmf/dependency/RMF/src/show_hierarchy.cpp @@ -18,6 +18,7 @@ #include "RMF/decorator/alternatives.h" #include "RMF/decorator/shape.h" #include "RMF/decorator/reference.h" +#include "RMF/decorator/provenance.h" #include "RMF/enums.h" #include "RMF/infrastructure_macros.h" #include "RMF/types.h" @@ -175,6 +176,11 @@ void show_node_decorators( decorator::ChainFactory chaincf, decorator::DomainFactory fragcf, decorator::CopyFactory copycf, decorator::DiffuserFactory diffusercf, decorator::TypedFactory typedcf, decorator::ReferenceFactory refcf, + decorator::StructureProvenanceFactory strucpcf, + decorator::SampleProvenanceFactory samppcf, + decorator::CombineProvenanceFactory combpcf, + decorator::FilterProvenanceFactory filtpcf, + decorator::ClusterProvenanceFactory clustpcf, std::string) { using std::operator<<; out << "\"" << n.get_name() << "\"" << node_suffix << " [" << n.get_type() @@ -219,6 +225,16 @@ void show_node_decorators( else if (diffusercf.get_is(n)) out << " diffuser"; if (refcf.get_is_static(n)) out << " reference(s)"; else if (refcf.get_is(n)) out << " reference"; + if (strucpcf.get_is_static(n)) out << " structure provenance(s)"; + else if (strucpcf.get_is(n)) out << " structure provenance"; + if (samppcf.get_is_static(n)) out << " sample provenance(s)"; + else if (samppcf.get_is(n)) out << " sample provenance"; + if (combpcf.get_is_static(n)) out << " combine provenance(s)"; + else if (combpcf.get_is(n)) out << " combine provenance"; + if (filtpcf.get_is_static(n)) out << " filter provenance(s)"; + else if (filtpcf.get_is(n)) out << " filter provenance"; + if (clustpcf.get_is_static(n)) out << " cluster provenance(s)"; + else if (clustpcf.get_is(n)) out << " cluster provenance"; out << "]"; } @@ -278,6 +294,11 @@ struct ShowDecorators { decorator::DiffuserFactory diffusercf; decorator::TypedFactory typedcf; decorator::ReferenceFactory refcf; + decorator::StructureProvenanceFactory strucpcf; + decorator::SampleProvenanceFactory samppcf; + decorator::CombineProvenanceFactory combpcf; + decorator::FilterProvenanceFactory filtpcf; + decorator::ClusterProvenanceFactory clustpcf; ShowDecorators(FileConstHandle fh) : bdf(fh), ccf(fh), @@ -296,12 +317,18 @@ struct ShowDecorators { copycf(fh), diffusercf(fh), typedcf(fh), - refcf(fh) {} + refcf(fh), + strucpcf(fh), + samppcf(fh), + combpcf(fh), + filtpcf(fh), + clustpcf(fh) {} void operator()(NodeConstHandle cur, std::string prefix, std::string suffix, std::ostream& out) { show_node_decorators(cur, suffix, out, bdf, ccf, pcf, ipcf, rpcf, scf, repcf, bcf, cycf, segcf, rcf, acf, chaincf, fragcf, - copycf, diffusercf, typedcf, refcf, prefix + " "); + copycf, diffusercf, typedcf, refcf, strucpcf, + samppcf, combpcf, filtpcf, clustpcf, prefix + " "); } }; } diff --git a/modules/rmf/dependency/RMF/src/signature.cpp b/modules/rmf/dependency/RMF/src/signature.cpp index acb167c740..b1cce897de 100644 --- a/modules/rmf/dependency/RMF/src/signature.cpp +++ b/modules/rmf/dependency/RMF/src/signature.cpp @@ -20,6 +20,7 @@ #include "RMF/decorator/feature.h" #include "RMF/decorator/bond.h" #include "RMF/decorator/reference.h" +#include "RMF/decorator/provenance.h" #include "RMF/decorator/shape.h" RMF_ENABLE_WARNINGS diff --git a/modules/rmf/dependency/RMF/swig/RMF.decorator.i b/modules/rmf/dependency/RMF/swig/RMF.decorator.i index 4d16584c8d..568290cdde 100644 --- a/modules/rmf/dependency/RMF/swig/RMF.decorator.i +++ b/modules/rmf/dependency/RMF/swig/RMF.decorator.i @@ -10,3 +10,4 @@ %include "RMF/decorator/bond.h" %include "RMF/decorator/labels.h" %include "RMF/decorator/reference.h" +%include "RMF/decorator/provenance.h" diff --git a/modules/rmf/dependency/RMF/test/test_provenance.py b/modules/rmf/dependency/RMF/test/test_provenance.py new file mode 100644 index 0000000000..7f6e0ff6dd --- /dev/null +++ b/modules/rmf/dependency/RMF/test/test_provenance.py @@ -0,0 +1,78 @@ +from __future__ import print_function +import sys +import RMF +import unittest +import os + +class Tests(unittest.TestCase): + + def test_provenance(self): + """Test the Provenance decorator""" + for suffix in RMF.suffixes: + fname = RMF._get_temporary_file_path("provenance." + suffix) + self._create(fname) + self._read(fname) + + def _add_provenance_nodes(self, rmf, rt): + """Add *Provenance nodes under rt. Return the root.""" + strucpf = RMF.StructureProvenanceFactory(rmf) + samppf = RMF.SampleProvenanceFactory(rmf) + clustpf = RMF.ClusterProvenanceFactory(rmf) + + clust_node = rt.add_child("clustering", RMF.PROVENANCE) + clust = clustpf.get(clust_node) + clust.set_members(10) + + samp_node = clust_node.add_child("sampling", RMF.PROVENANCE) + samp = samppf.get(samp_node) + samp.set_method("Monte Carlo") + samp.set_frames(100) + samp.set_iterations(10) + + struc_node = samp_node.add_child("structure", RMF.PROVENANCE) + struc = strucpf.get(struc_node) + struc.set_filename('foo.pdb') + struc.set_chain('X') + + return clust_node + + def _create(self, fname): + rmf = RMF.create_rmf_file(fname) + rmf.add_frame('zero', RMF.FRAME) + rt = rmf.get_root_node() + + c1 = rt.add_child("c1", RMF.REPRESENTATION) + c0 = self._add_provenance_nodes(rmf, c1) + self._check_provenance_nodes(rmf, c0) + + def _read(self, fname): + rmf = RMF.open_rmf_file_read_only(fname) + rt = rmf.get_root_node() + c1, = rt.get_children() + c0, = c1.get_children() + self._check_provenance_nodes(rmf, c0) + + def _check_provenance_nodes(self, rmf, prov_root): + strucpf = RMF.StructureProvenanceFactory(rmf) + samppf = RMF.SampleProvenanceFactory(rmf) + clustpf = RMF.ClusterProvenanceFactory(rmf) + + self.assertTrue(clustpf.get_is(prov_root)) + clust = clustpf.get(prov_root) + self.assertEqual(clust.get_members(), 10) + + samp_node = prov_root.get_children()[0] + self.assertTrue(samppf.get_is(samp_node)) + samp = samppf.get(samp_node) + self.assertEqual(samp.get_frames(), 100) + self.assertEqual(samp.get_iterations(), 10) + self.assertEqual(samp.get_method(), "Monte Carlo") + + struc_node = samp_node.get_children()[0] + self.assertTrue(strucpf.get_is(struc_node)) + struc = strucpf.get(struc_node) + self.assertEqual(struc.get_filename(), 'foo.pdb') + self.assertEqual(struc.get_chain(), 'X') + +if __name__ == '__main__': + unittest.main() diff --git a/modules/rmf/dependency/RMF/tools/build/_decorators.py b/modules/rmf/dependency/RMF/tools/build/_decorators.py index c965ce28d7..defb3376f4 100755 --- a/modules/rmf/dependency/RMF/tools/build/_decorators.py +++ b/modules/rmf/dependency/RMF/tools/build/_decorators.py @@ -75,7 +75,9 @@ class Attribute(Base): def __init__(self, name, attribute_type, function_name=None, allow_null=False): if not function_name: - function_name = name.replace(" ", "_") + self.function_name = name.replace(" ", "_") + else: + self.function_name = function_name Base.__init__(self, name, attribute_type + "Key", attribute_type) self.get_methods = """ @@ -94,7 +96,7 @@ def __init__(self, name, attribute_type, function_name=None, return get_node().GET_STATIC(NAME_); } RMF_DECORATOR_CATCH( ); } -""" % (function_name, function_name, function_name) +""" % (self.function_name, self.function_name, self.function_name) # Note that this currently only works for string attributes if allow_null: self.get_methods = self.get_methods.replace('return', @@ -115,7 +117,7 @@ def __init__(self, name, attribute_type, function_name=None, get_node().SET_STATIC(NAME_, v); } RMF_DECORATOR_CATCH( ); } -""" % (function_name, function_name, function_name) +""" % (self.function_name, self.function_name, self.function_name) # If the attribute is allowed to be null, skip check if allow_null: self.check = "" @@ -147,26 +149,26 @@ def __init__(self, name): class PathAttribute(Attribute): - def __init__(self, name): - Attribute.__init__(self, name, "String") + def __init__(self, name, function_name=None): + Attribute.__init__(self, name, "String", function_name) self.get_methods = """ - String get_NAME() const { + String get_%s() const { try { String relpath = get_node().GET_BOTH(NAME_); String filename = get_node().get_file().get_path(); return internal::get_absolute_path(filename, relpath); } RMF_DECORATOR_CATCH( ); } -""" +""" % self.function_name self.set_methods = """ - void set_NAME(String path) { + void set_%s(String path) { try { String filename = get_node().get_file().get_path(); String relpath = internal::get_relative_path(filename, path); get_node().SET_BOTH(NAME_, relpath); } RMF_DECORATOR_CATCH( ); } -""" +""" % self.function_name class AttributePair(Base): diff --git a/modules/rmf/dependency/RMF/tools/build/make_decorators.py b/modules/rmf/dependency/RMF/tools/build/make_decorators.py index d25b9ba2cb..74480a3699 100755 --- a/modules/rmf/dependency/RMF/tools/build/make_decorators.py +++ b/modules/rmf/dependency/RMF/tools/build/make_decorators.py @@ -103,6 +103,44 @@ make_header("publication", [journal], []) +structure = Decorator(["PROVENANCE"], "provenance", + "StructureProvenance", + # Note that this should really be PathAttribute, + # but that currently requires that the file exists, + # otherwise reading the RMF file will fail + [Attribute("structure filename", "String", + function_name='filename'), + Attribute("structure chain", "String", + function_name='chain')]) + +sample = Decorator(["PROVENANCE"], "provenance", + "SampleProvenance", + [Attribute("sampling method", "String", + function_name='method'), + Attribute("sampling frames", "Int", + function_name='frames'), + Attribute("sampling iterations", "Int", + function_name='iterations')]) + +combine = Decorator(["PROVENANCE"], "provenance", + "CombineProvenance", + [Attribute("combined runs", "Int", function_name='runs'), + Attribute("combined frames", "Int", + function_name='frames')]) + +filterp = Decorator(["PROVENANCE"], "provenance", + "FilterProvenance", + [Attribute("filter threshold", "Float", + function_name='threshold'), + Attribute("filter frames", "Int", function_name='frames')]) + +cluster = Decorator(["PROVENANCE"], "provenance", + "ClusterProvenance", + [Attribute("cluster members", "Int", + function_name='members')]) + +make_header("provenance", + [structure, sample, combine, filterp, cluster], []) residue = Decorator(["REPRESENTATION"], "sequence", "Residue", diff --git a/modules/rmf/include/atom_links.h b/modules/rmf/include/atom_links.h index e1d27abb8b..878dba3298 100644 --- a/modules/rmf/include/atom_links.h +++ b/modules/rmf/include/atom_links.h @@ -16,11 +16,13 @@ #include "internal/atom_links_xyzs.h" #include "internal/atom_links_gaussians.h" #include +#include #include #include #include #include #include +#include #include #include #include @@ -37,6 +39,11 @@ class IMPRMFEXPORT HierarchyLoadLink : public SimpleLoadLink { RMF::decorator::IntermediateParticleFactory intermediate_particle_factory_; RMF::decorator::ReferenceFrameFactory reference_frame_factory_; RMF::decorator::AlternativesFactory af_; + RMF::decorator::StructureProvenanceFactory strucpf_; + RMF::decorator::SampleProvenanceFactory samppf_; + RMF::decorator::CombineProvenanceFactory combpf_; + RMF::decorator::FilterProvenanceFactory filtpf_; + RMF::decorator::ClusterProvenanceFactory clustpf_; RMF::decorator::ExplicitResolutionFactory explicit_resolution_factory_; RMF::IntKey external_rigid_body_key_; struct Data { @@ -72,6 +79,14 @@ class IMPRMFEXPORT HierarchyLoadLink : public SimpleLoadLink { void create_recursive(Model *m, ParticleIndex root, ParticleIndex cur, RMF::NodeConstHandle name, ParticleIndexes rigid_bodies, Data &data); + + // Make tree of *Provenance IMP particles corresponding to those in the RMF + void create_provenance(Model *m, RMF::NodeConstHandle node, + ParticleIndex cur); + + // Make *Provenance IMP particle corresponding to that in the RMF + core::Provenance create_one_provenance(Model *m, RMF::NodeConstHandle node); + virtual void do_load_one(RMF::NodeConstHandle nh, Particle *o) IMP_FINAL IMP_OVERRIDE; @@ -138,6 +153,11 @@ class IMPRMFEXPORT HierarchySaveLink : public SimpleSaveLink { DM; DM data_; RMF::decorator::AlternativesFactory af_; + RMF::decorator::StructureProvenanceFactory strucpf_; + RMF::decorator::SampleProvenanceFactory samppf_; + RMF::decorator::CombineProvenanceFactory combpf_; + RMF::decorator::FilterProvenanceFactory filtpf_; + RMF::decorator::ClusterProvenanceFactory clustpf_; RMF::decorator::ExplicitResolutionFactory explicit_resolution_factory_; RMF::IntKey external_rigid_body_key_; @@ -145,6 +165,10 @@ class IMPRMFEXPORT HierarchySaveLink : public SimpleSaveLink { ParticleIndex p, ParticleIndexes rigid_bodies, RMF::NodeHandle cur, Data &data); + + // Make RMF PROVENANCE nodes corresponding to those in IMP + void add_provenance(Model *m, ParticleIndex p, RMF::NodeHandle cur); + virtual void do_add(Particle *p, RMF::NodeHandle cur) IMP_OVERRIDE; virtual void do_save_one(Particle *o, RMF::NodeHandle nh) IMP_OVERRIDE; diff --git a/modules/rmf/src/atom_links.cpp b/modules/rmf/src/atom_links.cpp index 5f5d29f574..0935202880 100644 --- a/modules/rmf/src/atom_links.cpp +++ b/modules/rmf/src/atom_links.cpp @@ -40,8 +40,28 @@ std::string get_good_name(Model *m, ParticleIndex h) { return m->get_particle_name(h); } } + +// Get the 'child' provenance from the RMF, or a default-constructed +// object if no children exist +RMF::NodeConstHandle get_previous_rmf_provenance(RMF::NodeConstHandle node) { + RMF::NodeConstHandles nchs; + IMP_FOREACH(RMF::NodeConstHandle ch, node.get_children()) { + if (ch.get_type() == RMF::PROVENANCE) { + nchs.push_back(ch); + } + } + if (nchs.size() > 1) { + IMP_THROW("RMF provenance hierarchy has more than one child at " << node, + IOException); + } else if (nchs.empty()) { + return RMF::NodeConstHandle(); + } else { + return nchs[0]; + } } +} // anonymous namespace + void HierarchyLoadLink::do_load_one(RMF::NodeConstHandle nh, Particle *o) { data_.find(o->get_index()) @@ -118,6 +138,11 @@ void HierarchyLoadLink::create_recursive(Model *m, atom::Hierarchy(m, cur) .add_child(atom::Hierarchy::setup_particle(m, child)); } + } else if (ch.get_type() == RMF::PROVENANCE) { + // Note that at most only one such node should be encountered. If more + // than one is found, this is an error (and Provenanced::setup_particle() + // will throw) + create_provenance(m, ch, cur); } } do_setup_particle(m, root, cur, name); @@ -143,6 +168,50 @@ void HierarchyLoadLink::create_recursive(Model *m, } } +// Make *Provenance IMP particles corresponding to those in the RMF +void HierarchyLoadLink::create_provenance(Model *m, RMF::NodeConstHandle node, + ParticleIndex cur) { + core::Provenance prov = create_one_provenance(m, node); + core::Provenanced provd = core::Provenanced::setup_particle(m, cur, prov); + + while ((node = get_previous_rmf_provenance(node)) != RMF::NodeConstHandle()) { + core::Provenance thisprov = create_one_provenance(m, node); + prov.set_previous(thisprov); + prov = thisprov; + } +} + +core::Provenance HierarchyLoadLink::create_one_provenance(Model *m, + RMF::NodeConstHandle node) { + if (strucpf_.get_is(node)) { + RMF::decorator::StructureProvenanceConst rp = strucpf_.get(node); + ParticleIndex ip = m->add_particle(node.get_name()); + return core::StructureProvenance::setup_particle(m, ip, rp.get_filename(), + rp.get_chain()); + } else if (samppf_.get_is(node)) { + RMF::decorator::SampleProvenanceConst rp = samppf_.get(node); + ParticleIndex ip = m->add_particle(node.get_name()); + return core::SampleProvenance::setup_particle(m, ip, rp.get_method(), + rp.get_frames(), rp.get_iterations()); + } else if (combpf_.get_is(node)) { + RMF::decorator::CombineProvenanceConst rp = combpf_.get(node); + ParticleIndex ip = m->add_particle(node.get_name()); + return core::CombineProvenance::setup_particle(m, ip, rp.get_runs(), + rp.get_frames()); + } else if (filtpf_.get_is(node)) { + RMF::decorator::FilterProvenanceConst rp = filtpf_.get(node); + ParticleIndex ip = m->add_particle(node.get_name()); + return core::FilterProvenance::setup_particle(m, ip, rp.get_threshold(), + rp.get_frames()); + } else if (clustpf_.get_is(node)) { + RMF::decorator::ClusterProvenanceConst rp = clustpf_.get(node); + ParticleIndex ip = m->add_particle(node.get_name()); + return core::ClusterProvenance::setup_particle(m, ip, rp.get_members()); + } else { + IMP_THROW("Unhandled provenance type " << node, IOException); + } +} + Particle *HierarchyLoadLink::do_create(RMF::NodeConstHandle node, Model *m) { IMP_FUNCTION_LOG; @@ -247,7 +316,8 @@ HierarchyLoadLink::HierarchyLoadLink(RMF::FileConstHandle fh) : P("HierarchyLoadLink%1%"), intermediate_particle_factory_(fh), reference_frame_factory_(fh), - af_(fh), + af_(fh), strucpf_(fh), samppf_(fh), + combpf_(fh), filtpf_(fh), clustpf_(fh), explicit_resolution_factory_(fh) { RMF::Category imp_cat = fh.get_category("IMP"); external_rigid_body_key_ = @@ -347,6 +417,10 @@ void HierarchySaveLink::add_recursive(Model *m, ParticleIndex root, } } + if (core::Provenanced::get_is_setup(m, p)) { + add_provenance(m, p, cur); + } + if (!prep_nodes.empty() || !grep_nodes.empty()) { RMF::decorator::Alternatives ad = af_.get(cur); IMP_FOREACH(RMF::NodeHandle nh, prep_nodes) { @@ -358,8 +432,59 @@ void HierarchySaveLink::add_recursive(Model *m, ParticleIndex root, } } +// Make RMF PROVENANCE nodes corresponding to those in IMP +void HierarchySaveLink::add_provenance(Model *m, ParticleIndex p, + RMF::NodeHandle cur) { + core::Provenanced provd(m, p); + for (core::Provenance prov = provd.get_provenance(); prov; + prov = prov.get_previous()) { + if (core::StructureProvenance::get_is_setup(prov)) { + core::StructureProvenance ip(prov); + cur = cur.add_child(m->get_particle_name(prov.get_particle_index()), + RMF::PROVENANCE); + RMF::decorator::StructureProvenance rp = strucpf_.get(cur); + rp.set_filename(ip.get_filename()); + rp.set_chain(ip.get_chain_id()); + } else if (core::SampleProvenance::get_is_setup(prov)) { + core::SampleProvenance ip(prov); + cur = cur.add_child(m->get_particle_name(prov.get_particle_index()), + RMF::PROVENANCE); + RMF::decorator::SampleProvenance rp = samppf_.get(cur); + rp.set_method(ip.get_method()); + rp.set_frames(ip.get_number_of_frames()); + rp.set_iterations(ip.get_number_of_iterations()); + } else if (core::CombineProvenance::get_is_setup(prov)) { + core::CombineProvenance ip(prov); + cur = cur.add_child(m->get_particle_name(prov.get_particle_index()), + RMF::PROVENANCE); + RMF::decorator::CombineProvenance rp = combpf_.get(cur); + rp.set_runs(ip.get_number_of_runs()); + rp.set_frames(ip.get_number_of_frames()); + } else if (core::FilterProvenance::get_is_setup(prov)) { + core::FilterProvenance ip(prov); + cur = cur.add_child(m->get_particle_name(prov.get_particle_index()), + RMF::PROVENANCE); + RMF::decorator::FilterProvenance rp = filtpf_.get(cur); + rp.set_threshold(ip.get_threshold()); + rp.set_frames(ip.get_number_of_frames()); + } else if (core::ClusterProvenance::get_is_setup(prov)) { + core::ClusterProvenance ip(prov); + cur = cur.add_child(m->get_particle_name(prov.get_particle_index()), + RMF::PROVENANCE); + RMF::decorator::ClusterProvenance rp = clustpf_.get(cur); + rp.set_members(ip.get_number_of_members()); + } else { + IMP_THROW("Unhandled provenance type " + << m->get_particle_name(prov.get_particle_index()), + IOException); + } + } +} + HierarchySaveLink::HierarchySaveLink(RMF::FileHandle f) - : P("HierarchySaveLink%1%"), af_(f), explicit_resolution_factory_(f) { + : P("HierarchySaveLink%1%"), af_(f), strucpf_(f), samppf_(f), + combpf_(f), filtpf_(f), clustpf_(f), + explicit_resolution_factory_(f) { RMF::Category imp_cat = f.get_category("IMP"); external_rigid_body_key_ = f.get_key(imp_cat, "external frame", RMF::IntTraits()); diff --git a/modules/rmf/test/test_provenance.py b/modules/rmf/test/test_provenance.py new file mode 100644 index 0000000000..ae44985b98 --- /dev/null +++ b/modules/rmf/test/test_provenance.py @@ -0,0 +1,102 @@ +from __future__ import print_function +import unittest +import IMP.rmf +import IMP.test +import RMF + + +class Tests(IMP.test.TestCase): + + def add_provenance(self, h): + m = h.get_model() + struc = IMP.core.StructureProvenance.setup_particle( + m, IMP.Particle(m), "testfile", "testchain") + struc.set_name("structure provenance") + IMP.core.add_provenance(m, h, struc) + + samp = IMP.core.SampleProvenance.setup_particle( + m, IMP.Particle(m), "Monte Carlo", 100, 42) + IMP.core.add_provenance(m, h, samp) + + comb = IMP.core.CombineProvenance.setup_particle( + m, IMP.Particle(m), 4, 27) + IMP.core.add_provenance(m, h, comb) + + filt = IMP.core.FilterProvenance.setup_particle( + m, IMP.Particle(m), 100.5, 39) + IMP.core.add_provenance(m, h, filt) + + clus = IMP.core.ClusterProvenance.setup_particle(m, IMP.Particle(m), 10) + IMP.core.add_provenance(m, h, clus) + + def check_provenance(self, h): + m = h.get_model() + + # Test IMP-added chain provenance + chain, = IMP.atom.get_by_type(h, IMP.atom.CHAIN_TYPE) + self.assertTrue(IMP.core.Provenanced.get_is_setup(m, chain)) + prov = IMP.core.Provenanced(m, chain).get_provenance() + self.assertTrue(IMP.core.StructureProvenance.get_is_setup(m, prov)) + struc = IMP.core.StructureProvenance(m, prov) + self.assertEqual(struc.get_chain_id(), 'A') + + # Should be no more chain provenance + prov = prov.get_previous() + self.assertFalse(prov) + + # Check the provenance we added at the top level + self.assertTrue(IMP.core.Provenanced.get_is_setup(m, h)) + prov = IMP.core.Provenanced(m, h).get_provenance() + + self.assertTrue(IMP.core.ClusterProvenance.get_is_setup(m, prov)) + clus = IMP.core.ClusterProvenance(m, prov) + self.assertEqual(clus.get_number_of_members(), 10) + + prov = prov.get_previous() + self.assertTrue(IMP.core.FilterProvenance.get_is_setup(m, prov)) + filt = IMP.core.FilterProvenance(m, prov) + self.assertAlmostEqual(filt.get_threshold(), 100.5, delta=1e-4) + self.assertEqual(filt.get_number_of_frames(), 39) + + prov = prov.get_previous() + self.assertTrue(IMP.core.CombineProvenance.get_is_setup(m, prov)) + comb = IMP.core.CombineProvenance(m, prov) + self.assertEqual(comb.get_number_of_runs(), 4) + self.assertEqual(comb.get_number_of_frames(), 27) + + prov = prov.get_previous() + self.assertTrue(IMP.core.SampleProvenance.get_is_setup(m, prov)) + samp = IMP.core.SampleProvenance(m, prov) + self.assertEqual(samp.get_method(), "Monte Carlo") + self.assertEqual(samp.get_number_of_frames(), 100) + self.assertEqual(samp.get_number_of_iterations(), 42) + + prov = prov.get_previous() + self.assertTrue(IMP.core.StructureProvenance.get_is_setup(m, prov)) + struc = IMP.core.StructureProvenance(m, prov) + self.assertEqual(struc.get_filename(), "testfile") + self.assertEqual(struc.get_chain_id(), "testchain") + self.assertEqual(struc.get_name(), "structure provenance") + + # Should be no more provenance + prov = prov.get_previous() + self.assertFalse(prov) + + def test_rt(self): + """Test that provenance info can be stored in RMF files""" + for suffix in [".rmfz", ".rmf3"]: + m = IMP.Model() + name = self.get_tmp_file_name("test_provenance" + suffix) + h = IMP.atom.read_pdb(self.get_input_file_name("simple.pdb"), m, + IMP.atom.NonAlternativePDBSelector()) + self.add_provenance(h) + f = RMF.create_rmf_file(name) + IMP.rmf.add_hierarchy(f, h) + IMP.rmf.save_frame(f, "0") + del f + f = RMF.open_rmf_file_read_only(name) + h2 = IMP.rmf.create_hierarchies(f, m) + self.check_provenance(h2[0]) + +if __name__ == '__main__': + IMP.test.main()