diff --git a/boost/numeric/odeint/external/openmp/openmp_algebra.hpp b/boost/numeric/odeint/external/openmp/openmp_algebra.hpp index 333c18be..511d3855 100644 --- a/boost/numeric/odeint/external/openmp/openmp_algebra.hpp +++ b/boost/numeric/odeint/external/openmp/openmp_algebra.hpp @@ -20,26 +20,27 @@ #include #include +#include "openmp_state.hpp" namespace boost { namespace numeric { namespace odeint { -/** \brief OpenMP-parallelized algebra. +/** \brief Basic OpenMP-parallelized algebra. * * Requires `s.size()` and `s[n]`, i.e. a Random Access Container. */ -struct openmp_algebra +struct basic_openmp_algebra { // FIXME: _Pragma is C++11. -#define OPENMP_ALGEBRA(n) \ +#define BOOST_ODEINT_GEN_BODY(n) \ const size_t len = s0.size(); \ - _Pragma("omp parallel for") \ + _Pragma("omp parallel for schedule(dynamic)") \ for( size_t i = 0 ; i < len ; i++ ) \ op( BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) ); -BOOST_ODEINT_GEN_FOR_EACH(OPENMP_ALGEBRA) -#undef OPENMP_ALGEBRA +BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) +#undef BOOST_ODEINT_GEN_BODY template< class S > @@ -58,6 +59,40 @@ BOOST_ODEINT_GEN_FOR_EACH(OPENMP_ALGEBRA) }; +/** \brief OpenMP-parallelized algebra, wrapping another, non-parallelized algebra. + */ +template< class InnerAlgebra > +struct openmp_algebra +{ + +// FIXME: _Pragma is C++11. +#define BOOST_ODEINT_GEN_BODY(n) \ + const size_t len = s0.size(); \ + _Pragma("omp parallel for schedule(static,1)") \ + for( size_t i = 0 ; i < len ; i++ ) \ + InnerAlgebra::for_each##n( \ + BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) \ + ); +BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) +#undef BOOST_ODEINT_GEN_BODY + + + template< class InnerState > + static typename norm_result_type< T >::type norm_inf( const openmp_state< InnerState > &s ) + { + using std::max; + using std::abs; + typedef typename norm_result_type< S >::type result_type; + result_type init = static_cast< result_type >( 0 ); +# pragma omp parallel for reduction(max: init) schedule(static,1) + for( size_t i = 0 ; i < s.size() ; i++ ) + init = max( init , InnerAlgebra::norm_inf( s[i] ) ); + return init; + } + +} + + } } } diff --git a/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp b/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp new file mode 100644 index 00000000..775756cb --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp @@ -0,0 +1,40 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp + + [begin_description] + Algebra dispatcher to automatically chose suitable algebra. + [end_description] + + Copyright 2009-2013 Karsten Ahnert + Copyright 2009-2013 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#ifndef BOOST_NUMERIC_ODEINT_OPENMP_OPENMP_ALGEBRA_DISPATCHER_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_OPENMP_OPENMP_ALGEBRA_DISPATCHER_HPP_INCLUDED + +#include +#include +#include "openmp_state.hpp" + +namespace boost { +namespace numeric { +namespace odeint { + +template< class Inner > +struct algebra_dispatcher< openmp_state< Inner > > +{ + typedef openmp_algebra< + algebra_dispatcher< Inner >::algebra_type + > algebra_type; +}; + +} +} +} + +#endif diff --git a/boost/numeric/odeint/external/openmp/openmp_resize.hpp b/boost/numeric/odeint/external/openmp/openmp_resize.hpp new file mode 100644 index 00000000..d001f412 --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_resize.hpp @@ -0,0 +1,80 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_resize.hpp + + [begin_description] + Delegate resizing of OpenMP-state to inner state. + [end_description] + + Copyright 2009-2011 Karsten Ahnert + Copyright 2009-2011 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + + +#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RESIZE_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RESIZE_HPP_INCLUDED + +#include +#include "openmp_state.hpp" + +namespace boost { +namespace numeric { +namespace odeint { + + +template< class InnerState > +struct is_resizeable< openmp_state< InnerState > > +{ + const static bool value = is_resizable< InnerState >::value; +}; + + +template< class InnerState1, class InnerState2 > +struct same_size_impl< openmp_state< InnerState1 > , openmp_state< InnerState2 > > +{ + static bool same_size( const openmp_state< InnerState1 > &x , const openmp_state< InnerState2 > &y ) + { + if( x.size() != y.size() ) return false; + for( size_t i = 0 ; i != x.size() ; i++ ) + if( !same_size(x[i], y[i]) ) return false; + return true; + } +}; + + +template< class InnerStateOut, class InnerStateIn > +struct resize_impl< openmp_state< InnerStateOut > , openmp_state< InnerStateIn > > +{ + static void resize( openmp_state< InnerStateOut > &x , const openmp_state< InnerStateIn > &y ) + { + x.m_parts.resize( y.size() ); +# pragma omp parallel for schedule(static,1) + for(size_t i = 0 ; i != x.size() ; i++) + resize( x[i], y[i] ); + } +}; + + +template< class InnerStateIn , class InnerStateOut > +struct copy_impl< openmp_state< InnerStateIn >, openmp_state< InnerStateOut > > +{ + static void copy( const openmp_state< InnerStateIn > &from, openmp_state< InnerStateOut > &to ) + { +# pragma omp parallel for schedule(static,1) + for(size_t i = 0 ; i != from.size() ; i++) + copy( from[i], to[i] ); + } +}; + + +} // odeint +} // numeric +} // boost + + +#endif // BOOST_NUMERIC_ODEINT_EXTERNAL_THRUST_THRUST_RESIZE_HPP_INCLUDED + diff --git a/boost/numeric/odeint/external/openmp/openmp_state.hpp b/boost/numeric/odeint/external/openmp/openmp_state.hpp new file mode 100644 index 00000000..cc0fbec2 --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_state.hpp @@ -0,0 +1,81 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_state.hpp + + [begin_description] + Wrappers for OpenMP. + [end_description] + + Copyright 2009-2011 Karsten Ahnert + Copyright 2009-2011 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + + +#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_STATE_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_STATE_HPP_INCLUDED + +#include +#include +#include +#include +#include +#include + + +namespace boost { +namespace numeric { +namespace odeint { + +/** \brief A container that is split into distinct parts, for threading. + */ +template< class InnerState > +struct openmp_state +{ + std::vector< InnerState > &m_parts; + + template< class Container > + openmp_state( const Container &data ) + : m_parts( omp_get_num_threads() ) + { + const size_t size = last - first; + const size_t part = size / m_parts.size(); +# pragma omp parallel for schedule(static,1) + for(size_t i = 0 ; i != m_parts.size() ; i++) { + const size_t start = i * part; + const size_t end = (std::min)( (i + 1) * part, size ); + const boost::sliced_range sl = boost::adaptors::slice(data, start, end); + resize( m_parts[i], sl ); + copy( sl, m_parts[i] ); + } + } + + InnerState & operator[](size_t i) + { + return m_parts[i]; + } + + const InnerState & operator[](size_t i) const + { + return m_parts[i]; + } + + size_t size() const + { + return m_parts.size(); + } + +}; + + + +} +} +} + + +#endif + diff --git a/boost/numeric/odeint/external/openmp/openmp_system.hpp b/boost/numeric/odeint/external/openmp/openmp_system.hpp new file mode 100644 index 00000000..976cf723 --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_system.hpp @@ -0,0 +1,52 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_state.hpp + + [begin_description] + Parallelizing wrapper for system functions + [end_description] + + Copyright 2009-2011 Karsten Ahnert + Copyright 2009-2011 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + + +#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_SYSTEM_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_SYSTEM_HPP_INCLUDED + +#include "openmp_state.hpp" +#include + + +template< class State, class Deriv, class Time > +struct system_function_wrapper_impl +{ + typedef boost::function sys_fun_t; + const sys_fun_t &f; + system_function_wrapper_impl(const sys_fun_t &f) : f(f) {} + + inline void operator(const openmp_state< State > &s, openmp_state< Deriv > &d, const Time &t) const + { +# pragma omp parallel for schedule(static,1) + for(size_t i = 0 ; i < s.size() ; i++) + f(s[i], d[i], t); + } +}; + + +template< class State, class Deriv, class Time > +inline openmp_wrapper_impl< State, Deriv, Time > +openmp_wrapper( boost::function &f ) +{ + return openmp_wrapper_impl(f); +} + + + + + +#endif diff --git a/libs/numeric/odeint/examples/openmp/lorenz_ensemble.cpp b/libs/numeric/odeint/examples/openmp/lorenz_ensemble.cpp index 04017d02..6a98b70d 100644 --- a/libs/numeric/odeint/examples/openmp/lorenz_ensemble.cpp +++ b/libs/numeric/odeint/examples/openmp/lorenz_ensemble.cpp @@ -16,30 +16,25 @@ #include #include #include -#include +#include #include "point_type.hpp" using namespace std; -typedef vector vector_type; +typedef vector inner_state_type; typedef point point_type; -typedef vector state_type; const double sigma = 10.0; const double b = 8.0 / 3.0; +const double R = 10; // FIXME: should vary for each element. struct sys_func { - const vector_type &R; - sys_func( const vector_type &_R ) : R( _R ) { } - - void operator()( const state_type &x , state_type &dxdt , double t ) const { - const size_t n = x.size(); -# pragma omp parallel for - for(size_t i = 0 ; i < n ; i++) { + void operator()( const inner_state_type &x , inner_state_type &dxdt , double t ) const { + for(size_t i = 0 ; i < x.size() ; i++) { const point_type &xi = x[i]; point_type &dxdti = dxdt[i]; dxdti[0] = -sigma * (xi[0] - xi[1]); - dxdti[1] = R[i] * xi[0] - xi[1] - xi[0] * xi[2]; + dxdti[1] = R * xi[0] - xi[1] - xi[0] * xi[2]; dxdti[2] = -b * xi[2] + xi[0] * xi[1]; } } @@ -49,43 +44,23 @@ struct sys_func { int main() { using namespace boost::numeric::odeint; - const size_t n = 1024; - vector_type R(n); - const double Rmin = 0.1, Rmax = 50.0; -# pragma omp parallel for - for(size_t i = 0 ; i < n ; i++) - R[i] = Rmin + (Rmax - Rmin) / (n - 1) * i; + typedef openmp_state state_type; - state_type X(n, point_type(10, 10, 10)); + const size_t n = 1024; + inner_state_type inner(n, point_type(10, 10, 10)); + state_type state(inner); - typedef runge_kutta4< - state_type, double, - state_type, double, - openmp_algebra - > stepper; + typedef runge_kutta4< state_type, double > stepper; const double t_max = 10.0, dt = 0.01; - const int thr = omp_get_max_threads(); - for(size_t i = 0 ; i < 10 ; i++) { - omp_set_num_threads(thr); - const double start0 = omp_get_wtime(); - integrate_const( - stepper(), - sys_func(R), X, - 0.0, t_max, dt); - const double delta0 = omp_get_wtime() - start0; - - omp_set_num_threads(1); - const double start1 = omp_get_wtime(); - integrate_const( - stepper(), - sys_func(R), X, - 0.0, t_max, dt); - const double delta1 = omp_get_wtime() - start1; - - cout << thr << "t=" << delta0 << "s\t1t=" << delta1 << "s\tspeedup=" << (delta1 / delta0) << endl; - } + integrate_const( + stepper(), + openmp_wrapper( sys_func() ), + state, + 0.0, t_max, dt + );\ + return 0; } diff --git a/libs/numeric/odeint/examples/openmp/lorenz_ensemble_simple.cpp b/libs/numeric/odeint/examples/openmp/lorenz_ensemble_simple.cpp new file mode 100644 index 00000000..eaa1dddf --- /dev/null +++ b/libs/numeric/odeint/examples/openmp/lorenz_ensemble_simple.cpp @@ -0,0 +1,83 @@ +/* Boost libs/numeric/odeint/examples/openmp/lorenz_ensemble_simple.cpp + + Copyright 2009-2012 Karsten Ahnert + Copyright 2009-2012 Mario Mulansky + + Parallelized Lorenz ensembles + + Distributed under the Boost Software License, Version 1.0. +(See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + +#if _OMP + +#include +#include +#include +#include +#include +#include "point_type.hpp" + +using namespace std; + +typedef vector vector_type; +typedef point point_type; +typedef vector state_type; + +const double sigma = 10.0; +const double b = 8.0 / 3.0; + +struct sys_func { + const vector_type &R; + sys_func( const vector_type &_R ) : R( _R ) { } + + void operator()( const state_type &x , state_type &dxdt , double t ) const { + const size_t n = x.size(); +# pragma omp parallel for + for(size_t i = 0 ; i < n ; i++) { + const point_type &xi = x[i]; + point_type &dxdti = dxdt[i]; + dxdti[0] = -sigma * (xi[0] - xi[1]); + dxdti[1] = R[i] * xi[0] - xi[1] - xi[0] * xi[2]; + dxdti[2] = -b * xi[2] + xi[0] * xi[1]; + } + } +}; + + +int main() { + using namespace boost::numeric::odeint; + + const size_t n = 1024; + vector_type R(n); + const double Rmin = 0.1, Rmax = 50.0; +# pragma omp parallel for + for(size_t i = 0 ; i < n ; i++) + R[i] = Rmin + (Rmax - Rmin) / (n - 1) * i; + + state_type X(n, point_type(10, 10, 10)); + + typedef runge_kutta4< + state_type, double, + state_type, double, + openmp_algebra + > stepper; + + const double t_max = 10.0, dt = 0.01; + + integrate_const( + stepper(), + sys_func(R), X, + 0.0, t_max, dt + ); + + return 0; +} + +#else + +int main() { return -1; } + +#endif +