Merge pull request #1885 from borglab/feature/tripletFactor

TransferFactor with numerical derivatives
borglab · Oct 27, 2024 · d48b1fc · d48b1fc
2 parents 210e582 + cbbe2d2
commit d48b1fc
Show file tree

Hide file tree

Showing 13 changed files with 794 additions and 117 deletions.
diff --git a/examples/SFMExample.cpp b/examples/SFMExample.cpp
@@ -39,7 +39,7 @@
 // Finally, once all of the factors have been added to our factor graph, we will want to
 // solve/optimize to graph to find the best (Maximum A Posteriori) set of variable values.
 // GTSAM includes several nonlinear optimizers to perform this step. Here we will use a
-// trust-region method known as Powell's Degleg
+// trust-region method known as Powell's Dogleg
 #include <gtsam/nonlinear/DoglegOptimizer.h>
 
 // The nonlinear solvers within GTSAM are iterative solvers, meaning they linearize the
@@ -57,7 +57,7 @@ using namespace gtsam;
 /* ************************************************************************* */
 int main(int argc, char* argv[]) {
   // Define the camera calibration parameters
-  Cal3_S2::shared_ptr K(new Cal3_S2(50.0, 50.0, 0.0, 50.0, 50.0));
+  auto K = std::make_shared<Cal3_S2>(50.0, 50.0, 0.0, 50.0, 50.0);
 
   // Define the camera observation noise model
   auto measurementNoise =

diff --git a/examples/SFMdata.h b/examples/SFMdata.h
@@ -16,56 +16,89 @@
  */
 
 /**
- * A structure-from-motion example with landmarks, default function arguments give
+ * A structure-from-motion example with landmarks, default arguments give:
  *  - The landmarks form a 10 meter cube
  *  - The robot rotates around the landmarks, always facing towards the cube
- * Passing function argument allows to specificy an initial position, a pose increment and step count.
+ * Passing function argument allows to specify an initial position, a pose
+ * increment and step count.
  */
 
 #pragma once
 
-// As this is a full 3D problem, we will use Pose3 variables to represent the camera
-// positions and Point3 variables (x, y, z) to represent the landmark coordinates.
-// Camera observations of landmarks (i.e. pixel coordinates) will be stored as Point2 (x, y).
-// We will also need a camera object to hold calibration information and perform projections.
-#include <gtsam/geometry/Pose3.h>
+// As this is a full 3D problem, we will use Pose3 variables to represent the
+// camera positions and Point3 variables (x, y, z) to represent the landmark
+// coordinates. Camera observations of landmarks (i.e. pixel coordinates) will
+// be stored as Point2 (x, y).
 #include <gtsam/geometry/Point3.h>
+#include <gtsam/geometry/Pose3.h>
 
-// We will also need a camera object to hold calibration information and perform projections.
-#include <gtsam/geometry/PinholeCamera.h>
+// We will also need a camera object to hold calibration information and perform
+// projections.
 #include <gtsam/geometry/Cal3_S2.h>
+#include <gtsam/geometry/PinholeCamera.h>
 
-/* ************************************************************************* */
-std::vector<gtsam::Point3> createPoints() {
+namespace gtsam {
 
-  // Create the set of ground-truth landmarks
-  std::vector<gtsam::Point3> points;
-  points.push_back(gtsam::Point3(10.0,10.0,10.0));
-  points.push_back(gtsam::Point3(-10.0,10.0,10.0));
-  points.push_back(gtsam::Point3(-10.0,-10.0,10.0));
-  points.push_back(gtsam::Point3(10.0,-10.0,10.0));
-  points.push_back(gtsam::Point3(10.0,10.0,-10.0));
-  points.push_back(gtsam::Point3(-10.0,10.0,-10.0));
-  points.push_back(gtsam::Point3(-10.0,-10.0,-10.0));
-  points.push_back(gtsam::Point3(10.0,-10.0,-10.0));
+/// Create a set of ground-truth landmarks
+std::vector<Point3> createPoints() {
+  std::vector<Point3> points;
+  points.push_back(Point3(10.0, 10.0, 10.0));
+  points.push_back(Point3(-10.0, 10.0, 10.0));
+  points.push_back(Point3(-10.0, -10.0, 10.0));
+  points.push_back(Point3(10.0, -10.0, 10.0));
+  points.push_back(Point3(10.0, 10.0, -10.0));
+  points.push_back(Point3(-10.0, 10.0, -10.0));
+  points.push_back(Point3(-10.0, -10.0, -10.0));
+  points.push_back(Point3(10.0, -10.0, -10.0));
 
   return points;
 }
 
-/* ************************************************************************* */
-std::vector<gtsam::Pose3> createPoses(
-            const gtsam::Pose3& init = gtsam::Pose3(gtsam::Rot3::Ypr(M_PI/2,0,-M_PI/2), gtsam::Point3(30, 0, 0)),
-            const gtsam::Pose3& delta = gtsam::Pose3(gtsam::Rot3::Ypr(0,-M_PI/4,0), gtsam::Point3(sin(M_PI/4)*30, 0, 30*(1-sin(M_PI/4)))),
-            int steps = 8) {
+/**
+ * Create a set of ground-truth poses
+ *  Default values give a circular trajectory, radius 30 at pi/4 intervals,
+ * always facing the circle center
+ */
+std::vector<Pose3> createPoses(
+    const Pose3& init = Pose3(Rot3::Ypr(M_PI_2, 0, -M_PI_2), {30, 0, 0}),
+    const Pose3& delta = Pose3(Rot3::Ypr(0, -M_PI_4, 0),
+                               {sin(M_PI_4) * 30, 0, 30 * (1 - sin(M_PI_4))}),
+    int steps = 8) {
+  std::vector<Pose3> poses;
+  poses.reserve(steps);
 
-  // Create the set of ground-truth poses
-  // Default values give a circular trajectory, radius 30 at pi/4 intervals, always facing the circle center
-  std::vector<gtsam::Pose3> poses;
-  int i = 1;
   poses.push_back(init);
-  for(; i < steps; ++i) {
-    poses.push_back(poses[i-1].compose(delta));
+  for (int i = 1; i < steps; ++i) {
+    poses.push_back(poses[i - 1].compose(delta));
   }
 
   return poses;
 }
+
+/**
+ * Create regularly spaced poses with specified radius and number of cameras
+ */
+std::vector<Pose3> posesOnCircle(int num_cameras = 8, double R = 30) {
+  const double theta = 2 * M_PI / num_cameras;
+
+  // Initial pose at angle 0, position (R, 0, 0), facing the center with Y-axis
+  // pointing down
+  const Pose3 init(Rot3::Ypr(M_PI_2, 0, -M_PI_2), {R, 0, 0});
+
+  // Delta rotation: rotate by -theta around Z-axis (counterclockwise movement)
+  Rot3 delta_rotation = Rot3::Ypr(0, -theta, 0);
+
+  // Delta translation in world frame
+  Vector3 delta_translation_world(R * (cos(theta) - 1), R * sin(theta), 0);
+
+  // Transform delta translation to local frame of the camera
+  Vector3 delta_translation_local =
+      init.rotation().inverse() * delta_translation_world;
+
+  // Define delta pose
+  const Pose3 delta(delta_rotation, delta_translation_local);
+
+  // Generate poses using createPoses
+  return createPoses(init, delta, num_cameras);
+}
+}  // namespace gtsam
diff --git a/examples/ViewGraphExample.cpp b/examples/ViewGraphExample.cpp
@@ -0,0 +1,136 @@
+/* ----------------------------------------------------------------------------
+
+ * GTSAM Copyright 2010, Georgia Tech Research Corporation,
+ * Atlanta, Georgia 30332-0415
+ * All Rights Reserved
+ * Authors: Frank Dellaert, et al. (see THANKS for the full author list)
+
+ * See LICENSE for the license information
+
+ * -------------------------------------------------------------------------- */
+
+/**
+ * @file    ViewGraphExample.cpp
+ * @brief   View-graph calibration on a simulated dataset, a la Sweeney 2015
+ * @author  Frank Dellaert
+ * @date    October 2024
+ */
+
+#include <gtsam/geometry/Cal3_S2.h>
+#include <gtsam/geometry/PinholeCamera.h>
+#include <gtsam/geometry/Point2.h>
+#include <gtsam/geometry/Point3.h>
+#include <gtsam/geometry/Pose3.h>
+#include <gtsam/inference/EdgeKey.h>
+#include <gtsam/nonlinear/LevenbergMarquardtOptimizer.h>
+#include <gtsam/nonlinear/NonlinearFactorGraph.h>
+#include <gtsam/nonlinear/Values.h>
+#include <gtsam/sfm/TransferFactor.h>
+
+#include <vector>
+
+#include "SFMdata.h"
+#include "gtsam/inference/Key.h"
+
+using namespace std;
+using namespace gtsam;
+
+/* ************************************************************************* */
+int main(int argc, char* argv[]) {
+  // Define the camera calibration parameters
+  Cal3_S2 K(50.0, 50.0, 0.0, 50.0, 50.0);
+
+  // Create the set of 8 ground-truth landmarks
+  vector<Point3> points = createPoints();
+
+  // Create the set of 4 ground-truth poses
+  vector<Pose3> poses = posesOnCircle(4, 30);
+
+  // Calculate ground truth fundamental matrices, 1 and 2 poses apart
+  auto F1 = FundamentalMatrix(K, poses[0].between(poses[1]), K);
+  auto F2 = FundamentalMatrix(K, poses[0].between(poses[2]), K);
+
+  // Simulate measurements from each camera pose
+  std::array<std::array<Point2, 8>, 4> p;
+  for (size_t i = 0; i < 4; ++i) {
+    PinholeCamera<Cal3_S2> camera(poses[i], K);
+    for (size_t j = 0; j < 8; ++j) {
+      p[i][j] = camera.project(points[j]);
+    }
+  }
+
+  // This section of the code is inspired by the work of Sweeney et al.
+  // [link](sites.cs.ucsb.edu/~holl/pubs/Sweeney-2015-ICCV.pdf) on view-graph
+  // calibration. The graph is made up of transfer factors that enforce the
+  // epipolar constraint between corresponding points across three views, as
+  // described in the paper. Rather than adding one ternary error term per point
+  // in a triplet, we add three binary factors for sparsity during optimization.
+  // In this version, we only include triplets between 3 successive cameras.
+  NonlinearFactorGraph graph;
+  using Factor = TransferFactor<FundamentalMatrix>;
+
+  for (size_t a = 0; a < 4; ++a) {
+    size_t b = (a + 1) % 4;  // Next camera
+    size_t c = (a + 2) % 4;  // Camera after next
+
+    // Vectors to collect tuples for each factor
+    std::vector<std::tuple<Point2, Point2, Point2>> tuples1, tuples2, tuples3;
+
+    // Collect data for the three factors
+    for (size_t j = 0; j < 8; ++j) {
+      tuples1.emplace_back(p[a][j], p[b][j], p[c][j]);
+      tuples2.emplace_back(p[a][j], p[c][j], p[b][j]);
+      tuples3.emplace_back(p[c][j], p[b][j], p[a][j]);
+    }
+
+    // Add transfer factors between views a, b, and c. Note that the EdgeKeys
+    // are crucial in performing the transfer in the right direction. We use
+    // exactly 8 unique EdgeKeys, corresponding to 8 unknown fundamental
+    // matrices we will optimize for.
+    graph.emplace_shared<Factor>(EdgeKey(a, c), EdgeKey(b, c), tuples1);
+    graph.emplace_shared<Factor>(EdgeKey(a, b), EdgeKey(b, c), tuples2);
+    graph.emplace_shared<Factor>(EdgeKey(a, c), EdgeKey(a, b), tuples3);
+  }
+
+  auto formatter = [](Key key) {
+    EdgeKey edge(key);
+    return (std::string)edge;
+  };
+
+  graph.print("Factor Graph:\n", formatter);
+
+  // Create a delta vector to perturb the ground truth
+  // We can't really go far before convergence becomes problematic :-(
+  Vector7 delta;
+  delta << 1, 2, 3, 4, 5, 6, 7;
+  delta *= 1e-5;
+
+  // Create the data structure to hold the initial estimate to the solution
+  Values initialEstimate;
+  for (size_t a = 0; a < 4; ++a) {
+    size_t b = (a + 1) % 4;  // Next camera
+    size_t c = (a + 2) % 4;  // Camera after next
+    initialEstimate.insert(EdgeKey(a, b), F1.retract(delta));
+    initialEstimate.insert(EdgeKey(a, c), F2.retract(delta));
+  }
+  initialEstimate.print("Initial Estimates:\n", formatter);
+    graph.printErrors(initialEstimate, "errors: ", formatter);
+
+  /* Optimize the graph and print results */
+  LevenbergMarquardtParams params;
+  params.setlambdaInitial(1000.0);  // Initialize lambda to a high value
+  params.setVerbosityLM("SUMMARY");
+  Values result =
+      LevenbergMarquardtOptimizer(graph, initialEstimate, params).optimize();
+
+  cout << "initial error = " << graph.error(initialEstimate) << endl;
+  cout << "final error = " << graph.error(result) << endl;
+
+  result.print("Final results:\n", formatter);
+
+  cout << "Ground Truth F1:\n" << F1.matrix() << endl;
+  cout << "Ground Truth F2:\n" << F2.matrix() << endl;
+
+  return 0;
+}
+/* ************************************************************************* */
diff --git a/gtsam/geometry/FundamentalMatrix.cpp b/gtsam/geometry/FundamentalMatrix.cpp
@@ -10,8 +10,8 @@
 namespace gtsam {
 
 //*************************************************************************
-Point2 Transfer(const Matrix3& Fca, const Point2& pa,  //
-                const Matrix3& Fcb, const Point2& pb) {
+Point2 EpipolarTransfer(const Matrix3& Fca, const Point2& pa,  //
+                        const Matrix3& Fcb, const Point2& pb) {
   // Create lines in camera a from projections of the other two cameras
   Vector3 line_a = Fca * Vector3(pa.x(), pa.y(), 1);
   Vector3 line_b = Fcb * Vector3(pb.x(), pb.y(), 1);
@@ -24,6 +24,7 @@ Point2 Transfer(const Matrix3& Fca, const Point2& pa,  //
 
   return intersectionPoint.head<2>();  // Return the 2D point
 }
+
 //*************************************************************************
 FundamentalMatrix::FundamentalMatrix(const Matrix3& F) {
   // Perform SVD
@@ -71,9 +72,7 @@ Matrix3 FundamentalMatrix::matrix() const {
 }
 
 void FundamentalMatrix::print(const std::string& s) const {
-  std::cout << s << "U:\n"
-            << U_.matrix() << "\ns: " << s_ << "\nV:\n"
-            << V_.matrix() << std::endl;
+  std::cout << s << matrix() << std::endl;
 }
 
 bool FundamentalMatrix::equals(const FundamentalMatrix& other,
@@ -98,20 +97,20 @@ FundamentalMatrix FundamentalMatrix::retract(const Vector& delta) const {
 }
 
 //*************************************************************************
-Matrix3 SimpleFundamentalMatrix::leftK() const {
+Matrix3 SimpleFundamentalMatrix::Ka() const {
   Matrix3 K;
   K << fa_, 0, ca_.x(), 0, fa_, ca_.y(), 0, 0, 1;
   return K;
 }
 
-Matrix3 SimpleFundamentalMatrix::rightK() const {
+Matrix3 SimpleFundamentalMatrix::Kb() const {
   Matrix3 K;
   K << fb_, 0, cb_.x(), 0, fb_, cb_.y(), 0, 0, 1;
   return K;
 }
 
 Matrix3 SimpleFundamentalMatrix::matrix() const {
-  return leftK().transpose().inverse() * E_.matrix() * rightK().inverse();
+  return Ka().transpose().inverse() * E_.matrix() * Kb().inverse();
 }
 
 void SimpleFundamentalMatrix::print(const std::string& s) const {