From 8fd26a341b54f1a6728e6d5afdc5221118bae615 Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Tue, 15 Oct 2024 15:14:33 -0400 Subject: [PATCH 1/6] add additional direction methods beyond Polak-Ribiere --- .../NonlinearConjugateGradientOptimizer.cpp | 62 +++++++++++++++++-- .../NonlinearConjugateGradientOptimizer.h | 15 ++++- 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp index 9fce1776b8..14d2a705b4 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp @@ -28,6 +28,46 @@ namespace gtsam { typedef internal::NonlinearOptimizerState State; +/// Fletcher-Reeves formula for computing β, the direction of steepest descent. +static double FletcherReeves(const VectorValues& currentGradient, + const VectorValues& prevGradient) { + // Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1 + const double beta = std::max(0.0, currentGradient.dot(currentGradient) / + prevGradient.dot(prevGradient)); + return beta; +} + +/// Polak-Ribiere formula for computing β, the direction of steepest descent. +static double PolakRibiere(const VectorValues& currentGradient, + const VectorValues& prevGradient) { + // Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1 + const double beta = + std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / + prevGradient.dot(prevGradient)); + return beta; +} + +/// The Hestenes-Stiefel formula for computing β, the direction of steepest descent. +static double HestenesStiefel(const VectorValues& currentGradient, + const VectorValues& prevGradient, + const VectorValues& direction) { + // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) + VectorValues d = currentGradient - prevGradient; + const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); + return beta; +} + +/// The Dai-Yuan formula for computing β, the direction of steepest descent. +static double DaiYuan(const VectorValues& currentGradient, + const VectorValues& prevGradient, + const VectorValues& direction) { + // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) + const double beta = + std::max(0.0, currentGradient.dot(currentGradient) / + -direction.dot(currentGradient - prevGradient)); + return beta; +} + /** * @brief Return the gradient vector of a nonlinear factor graph * @param nfg the graph @@ -43,7 +83,7 @@ static VectorValues gradientInPlace(const NonlinearFactorGraph& nfg, NonlinearConjugateGradientOptimizer::NonlinearConjugateGradientOptimizer( const NonlinearFactorGraph& graph, const Values& initialValues, - const Parameters& params) + const Parameters& params, const DirectionMethod& directionMethod) : Base(graph, std::unique_ptr( new State(initialValues, graph.error(initialValues)))), params_(params) {} @@ -169,10 +209,22 @@ NonlinearConjugateGradientOptimizer::nonlinearConjugateGradient( } else { prevGradient = currentGradient; currentGradient = gradient(currentValues); - // Polak-Ribiere: beta = g'*(g_n-g_n-1)/g_n-1'*g_n-1 - const double beta = - std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / - prevGradient.dot(prevGradient)); + double beta; + + switch (directionMethod_) { + case DirectionMethod::FletcherReeves: + beta = FletcherReeves(currentGradient, prevGradient); + break; + case DirectionMethod::PolakRibiere: + beta = PolakRibiere(currentGradient, prevGradient); + break; + case DirectionMethod::HestenesStiefel: + beta = HestenesStiefel(currentGradient, prevGradient, direction); + break; + case DirectionMethod::DaiYuan: + beta = DaiYuan(currentGradient, prevGradient, direction); + break; + } direction = currentGradient + (beta * direction); } diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index cdc0634d6f..f9cd22361f 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -31,16 +31,25 @@ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer typedef NonlinearOptimizerParams Parameters; typedef std::shared_ptr shared_ptr; + enum class DirectionMethod { + FletcherReeves, + PolakRibiere, + HestenesStiefel, + DaiYuan + }; + protected: Parameters params_; + DirectionMethod directionMethod_; const NonlinearOptimizerParams &_params() const override { return params_; } public: /// Constructor - NonlinearConjugateGradientOptimizer(const NonlinearFactorGraph &graph, - const Values &initialValues, - const Parameters ¶ms = Parameters()); + NonlinearConjugateGradientOptimizer( + const NonlinearFactorGraph &graph, const Values &initialValues, + const Parameters ¶ms = Parameters(), + const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere); /// Destructor ~NonlinearConjugateGradientOptimizer() override {} From bc27d9fc96b0ba4e663331a17f1445b87ca0159b Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Tue, 15 Oct 2024 17:07:56 -0400 Subject: [PATCH 2/6] set default direction method --- gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index f9cd22361f..63ec4f6ea9 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -40,7 +40,7 @@ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer protected: Parameters params_; - DirectionMethod directionMethod_; + DirectionMethod directionMethod_ = DirectionMethod::PolakRibiere; const NonlinearOptimizerParams &_params() const override { return params_; } From 2d3a296d068159a8968a096364faf40bc100512c Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Thu, 17 Oct 2024 11:09:52 -0400 Subject: [PATCH 3/6] nonlinearConjugateGradient accepts direction method --- .../NonlinearConjugateGradientOptimizer.cpp | 35 ++++++------ .../NonlinearConjugateGradientOptimizer.h | 57 +++++++++++++++---- 2 files changed, 63 insertions(+), 29 deletions(-) diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp index fd0e60bc8b..438eac7a82 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp @@ -28,18 +28,18 @@ namespace gtsam { typedef internal::NonlinearOptimizerState State; -/// Fletcher-Reeves formula for computing β, the direction of steepest descent. -static double FletcherReeves(const VectorValues& currentGradient, - const VectorValues& prevGradient) { +/* ************************************************************************* */ +double FletcherReeves(const VectorValues& currentGradient, + const VectorValues& prevGradient) { // Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1 const double beta = std::max(0.0, currentGradient.dot(currentGradient) / prevGradient.dot(prevGradient)); return beta; } -/// Polak-Ribiere formula for computing β, the direction of steepest descent. -static double PolakRibiere(const VectorValues& currentGradient, - const VectorValues& prevGradient) { +/* ************************************************************************* */ +double PolakRibiere(const VectorValues& currentGradient, + const VectorValues& prevGradient) { // Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1 const double beta = std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / @@ -47,20 +47,20 @@ static double PolakRibiere(const VectorValues& currentGradient, return beta; } -/// The Hestenes-Stiefel formula for computing β, the direction of steepest descent. -static double HestenesStiefel(const VectorValues& currentGradient, - const VectorValues& prevGradient, - const VectorValues& direction) { +/* ************************************************************************* */ +double HestenesStiefel(const VectorValues& currentGradient, + const VectorValues& prevGradient, + const VectorValues& direction) { // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) VectorValues d = currentGradient - prevGradient; const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); return beta; } -/// The Dai-Yuan formula for computing β, the direction of steepest descent. -static double DaiYuan(const VectorValues& currentGradient, - const VectorValues& prevGradient, - const VectorValues& direction) { +/* ************************************************************************* */ +double DaiYuan(const VectorValues& currentGradient, + const VectorValues& prevGradient, + const VectorValues& direction) { // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) const double beta = std::max(0.0, currentGradient.dot(currentGradient) / @@ -110,7 +110,8 @@ NonlinearConjugateGradientOptimizer::System::advance(const State& current, GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() { const auto [newValues, dummy] = nonlinearConjugateGradient( - System(graph_), state_->values, params_, true /* single iteration */); + System(graph_), state_->values, params_, true /* single iteration */, + directionMethod_); state_.reset( new State(newValues, graph_.error(newValues), state_->iterations + 1)); @@ -121,8 +122,8 @@ GaussianFactorGraph::shared_ptr NonlinearConjugateGradientOptimizer::iterate() { const Values& NonlinearConjugateGradientOptimizer::optimize() { // Optimize until convergence System system(graph_); - const auto [newValues, iterations] = - nonlinearConjugateGradient(system, state_->values, params_, false); + const auto [newValues, iterations] = nonlinearConjugateGradient( + system, state_->values, params_, false, directionMethod_); state_.reset( new State(std::move(newValues), graph_.error(newValues), iterations)); return state_->values; diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index 78f11a6001..09b932d426 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -23,6 +23,31 @@ namespace gtsam { +/// Fletcher-Reeves formula for computing β, the direction of steepest descent. +double FletcherReeves(const VectorValues ¤tGradient, + const VectorValues &prevGradient); + +/// Polak-Ribiere formula for computing β, the direction of steepest descent. +double PolakRibiere(const VectorValues ¤tGradient, + const VectorValues &prevGradient); + +/// The Hestenes-Stiefel formula for computing β, +/// the direction of steepest descent. +double HestenesStiefel(const VectorValues ¤tGradient, + const VectorValues &prevGradient, + const VectorValues &direction); + +/// The Dai-Yuan formula for computing β, the direction of steepest descent. +double DaiYuan(const VectorValues ¤tGradient, + const VectorValues &prevGradient, const VectorValues &direction); + +enum class DirectionMethod { + FletcherReeves, + PolakRibiere, + HestenesStiefel, + DaiYuan +}; + /** An implementation of the nonlinear CG method using the template below */ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer : public NonlinearOptimizer { @@ -49,13 +74,6 @@ class GTSAM_EXPORT NonlinearConjugateGradientOptimizer typedef NonlinearOptimizerParams Parameters; typedef std::shared_ptr shared_ptr; - enum class DirectionMethod { - FletcherReeves, - PolakRibiere, - HestenesStiefel, - DaiYuan - }; - protected: Parameters params_; DirectionMethod directionMethod_ = DirectionMethod::PolakRibiere; @@ -149,7 +167,9 @@ double lineSearch(const S &system, const V currentValues, const W &gradient) { template std::tuple nonlinearConjugateGradient( const S &system, const V &initial, const NonlinearOptimizerParams ¶ms, - const bool singleIteration, const bool gradientDescent = false) { + const bool singleIteration, + const DirectionMethod &directionMethod = DirectionMethod::PolakRibiere, + const bool gradientDescent = false) { // GTSAM_CONCEPT_MANIFOLD_TYPE(V) size_t iteration = 0; @@ -186,10 +206,23 @@ std::tuple nonlinearConjugateGradient( } else { prevGradient = currentGradient; currentGradient = system.gradient(currentValues); - // Polak-Ribiere: beta = g'*(g_n-g_n-1)/g_n-1'*g_n-1 - const double beta = - std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / - prevGradient.dot(prevGradient)); + + double beta; + switch (directionMethod) { + case DirectionMethod::FletcherReeves: + beta = FletcherReeves(currentGradient, prevGradient); + break; + case DirectionMethod::PolakRibiere: + beta = PolakRibiere(currentGradient, prevGradient); + break; + case DirectionMethod::HestenesStiefel: + beta = HestenesStiefel(currentGradient, prevGradient, direction); + break; + case DirectionMethod::DaiYuan: + beta = DaiYuan(currentGradient, prevGradient, direction); + break; + } + direction = currentGradient + (beta * direction); } From 2d4ee5057a9651bd3e9a3e392570ccb0d5d807ab Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Fri, 18 Oct 2024 10:30:01 -0400 Subject: [PATCH 4/6] template the directional methods --- .../NonlinearConjugateGradientOptimizer.cpp | 40 ----------------- .../NonlinearConjugateGradientOptimizer.h | 44 +++++++++++++++---- 2 files changed, 35 insertions(+), 49 deletions(-) diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp index 438eac7a82..8b8542779a 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.cpp @@ -28,46 +28,6 @@ namespace gtsam { typedef internal::NonlinearOptimizerState State; -/* ************************************************************************* */ -double FletcherReeves(const VectorValues& currentGradient, - const VectorValues& prevGradient) { - // Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1 - const double beta = std::max(0.0, currentGradient.dot(currentGradient) / - prevGradient.dot(prevGradient)); - return beta; -} - -/* ************************************************************************* */ -double PolakRibiere(const VectorValues& currentGradient, - const VectorValues& prevGradient) { - // Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1 - const double beta = - std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / - prevGradient.dot(prevGradient)); - return beta; -} - -/* ************************************************************************* */ -double HestenesStiefel(const VectorValues& currentGradient, - const VectorValues& prevGradient, - const VectorValues& direction) { - // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) - VectorValues d = currentGradient - prevGradient; - const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); - return beta; -} - -/* ************************************************************************* */ -double DaiYuan(const VectorValues& currentGradient, - const VectorValues& prevGradient, - const VectorValues& direction) { - // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) - const double beta = - std::max(0.0, currentGradient.dot(currentGradient) / - -direction.dot(currentGradient - prevGradient)); - return beta; -} - /** * @brief Return the gradient vector of a nonlinear factor graph * @param nfg the graph diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index 09b932d426..1aee33a721 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -24,22 +24,48 @@ namespace gtsam { /// Fletcher-Reeves formula for computing β, the direction of steepest descent. -double FletcherReeves(const VectorValues ¤tGradient, - const VectorValues &prevGradient); +template +double FletcherReeves(const Gradient ¤tGradient, + const Gradient &prevGradient) { + // Fletcher-Reeves: beta = g_n'*g_n/g_n-1'*g_n-1 + const double beta = + currentGradient.dot(currentGradient) / prevGradient.dot(prevGradient); + return beta; +} /// Polak-Ribiere formula for computing β, the direction of steepest descent. -double PolakRibiere(const VectorValues ¤tGradient, - const VectorValues &prevGradient); +template +double PolakRibiere(const Gradient ¤tGradient, + const Gradient &prevGradient) { + // Polak-Ribiere: beta = g_n'*(g_n-g_n-1)/g_n-1'*g_n-1 + const double beta = + std::max(0.0, currentGradient.dot(currentGradient - prevGradient) / + prevGradient.dot(prevGradient)); + return beta; +} /// The Hestenes-Stiefel formula for computing β, /// the direction of steepest descent. -double HestenesStiefel(const VectorValues ¤tGradient, - const VectorValues &prevGradient, - const VectorValues &direction); +template +double HestenesStiefel(const Gradient ¤tGradient, + const Gradient &prevGradient, + const Gradient &direction) { + // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) + VectorValues d = currentGradient - prevGradient; + const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); + return beta; +} /// The Dai-Yuan formula for computing β, the direction of steepest descent. -double DaiYuan(const VectorValues ¤tGradient, - const VectorValues &prevGradient, const VectorValues &direction); +template +double DaiYuan(const Gradient ¤tGradient, const Gradient &prevGradient, + const VectorValues &direction) { + // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) + const double beta = + std::max(0.0, currentGradient.dot(currentGradient) / + -direction.dot(currentGradient - prevGradient)); + return beta; +} enum class DirectionMethod { FletcherReeves, From 07b11bc9f1d4050666d22f453a777a7f60b9d00c Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Sat, 19 Oct 2024 17:01:38 -0400 Subject: [PATCH 5/6] add test for different nonlinear CG direction methods --- ...estNonlinearConjugateGradientOptimizer.cpp | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp b/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp index 36673c7a08..f216e4a8c4 100644 --- a/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp +++ b/gtsam/nonlinear/tests/testNonlinearConjugateGradientOptimizer.cpp @@ -79,6 +79,49 @@ TEST(NonlinearConjugateGradientOptimizer, Optimize) { EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); } +/* ************************************************************************* */ +/// Test different direction methods +TEST(NonlinearConjugateGradientOptimizer, DirectionMethods) { + const auto [graph, initialEstimate] = generateProblem(); + + NonlinearOptimizerParams param; + param.maxIterations = + 500; /* requires a larger number of iterations to converge */ + param.verbosity = NonlinearOptimizerParams::SILENT; + + // Fletcher-Reeves + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::FletcherReeves); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Polak-Ribiere + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::PolakRibiere); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Hestenes-Stiefel + { + NonlinearConjugateGradientOptimizer optimizer( + graph, initialEstimate, param, DirectionMethod::HestenesStiefel); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } + // Dai-Yuan + { + NonlinearConjugateGradientOptimizer optimizer(graph, initialEstimate, param, + DirectionMethod::DaiYuan); + Values result = optimizer.optimize(); + + EXPECT_DOUBLES_EQUAL(0.0, graph.error(result), 1e-4); + } +} /* ************************************************************************* */ int main() { TestResult tr; From 7bb98946a27dfdfd933b0d384a292944173141a1 Mon Sep 17 00:00:00 2001 From: Varun Agrawal Date: Sat, 19 Oct 2024 17:03:15 -0400 Subject: [PATCH 6/6] missed two templates --- gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h index 1aee33a721..bd106afbed 100644 --- a/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h +++ b/gtsam/nonlinear/NonlinearConjugateGradientOptimizer.h @@ -51,7 +51,7 @@ double HestenesStiefel(const Gradient ¤tGradient, const Gradient &prevGradient, const Gradient &direction) { // Hestenes-Stiefel: beta = g_n'*(g_n-g_n-1)/(-s_n-1')*(g_n-g_n-1) - VectorValues d = currentGradient - prevGradient; + Gradient d = currentGradient - prevGradient; const double beta = std::max(0.0, currentGradient.dot(d) / -direction.dot(d)); return beta; } @@ -59,7 +59,7 @@ double HestenesStiefel(const Gradient ¤tGradient, /// The Dai-Yuan formula for computing β, the direction of steepest descent. template double DaiYuan(const Gradient ¤tGradient, const Gradient &prevGradient, - const VectorValues &direction) { + const Gradient &direction) { // Dai-Yuan: beta = g_n'*g_n/(-s_n-1')*(g_n-g_n-1) const double beta = std::max(0.0, currentGradient.dot(currentGradient) /