basic active set lbfgs

enzbus · Jun 7, 2024 · 4dc1066 · 4dc1066
1 parent 37f954a
commit 4dc1066
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 15 deletions.
diff --git a/project_euromir/tests/lbfgs.py b/project_euromir/tests/lbfgs.py
@@ -208,7 +208,7 @@ def minimize_lbfgs(
     current_point[:] = initial_point
 
     # the function can also modify the current_point (projection)
-    current_loss, current_gradient[:] = loss_and_gradient_function(
+    current_loss, current_gradient[:], active_set = loss_and_gradient_function(
         current_point)
 
     for i in range(max_iters):
@@ -225,21 +225,24 @@ def minimize_lbfgs(
         if i == 0:
             scale = 1/np.linalg.norm(current_gradient)
         elif memory > 0:
-            scale = np.dot(past_steps[-1], past_grad_diffs[-1]) / np.dot(
-                past_grad_diffs[-1], past_grad_diffs[-1])
-
-        direction[:] = - lbfgs_multiply(
-            current_gradient=current_gradient,
-            past_steps=past_steps[memory-i:],
-            past_grad_diffs=past_grad_diffs[memory-i:],
+            scale = np.dot(past_steps[-1, active_set], past_grad_diffs[-1, active_set]) / np.dot(
+                past_grad_diffs[-1, active_set], past_grad_diffs[-1, active_set])
+
+        direction[:] = 0.
+        direction[active_set] = - lbfgs_multiply(
+            current_gradient=current_gradient[active_set],
+            past_steps=past_steps[memory-i:, active_set],
+            past_grad_diffs=past_grad_diffs[memory-i:, active_set],
             base_inverse_diagonal=scale)
 
+        assert np.all(direction[~active_set] == 0.)
+
         step_size = 1.
         for _ in range(max_ls):
             next_point[:] = current_point + step_size * direction
 
             # the function can also modify the next_point (projection)
-            next_loss, next_gradient[:] = loss_and_gradient_function(
+            next_loss, next_gradient[:], active_set = loss_and_gradient_function(
                 next_point)
 
             armijo, curvature = strong_wolfe(

diff --git a/project_euromir/tests/test_lbfgs.py b/project_euromir/tests/test_lbfgs.py
@@ -49,6 +49,7 @@ def test_base_direction(self):
 
             np.random.seed(m)
             current_gradient = np.random.randn(n)
+            active_set = np.ones(n, dtype=bool)
 
             past_grad_diffs = np.random.randn(m, n)
             past_steps = np.random.randn(m, n)
@@ -79,7 +80,8 @@ def loss_and_gradient_function(x):
             residual = A @ x - b
             loss = np.linalg.norm(residual) ** 2
             gradient = 2 * A.T @ residual
-            return loss, gradient
+            active_set = np.ones(n, dtype=bool)
+            return loss, gradient, active_set
 
         result = sp.optimize.fmin_l_bfgs_b(
             loss_and_gradient_function, x0=np.zeros(n))
@@ -112,18 +114,17 @@ def loss_and_gradient_function(x):
             loss = np.linalg.norm(residual) ** 2
             gradient = 2 * A.T @ residual
             # zero out gradient going towards constraint (not away from it)
-            gradient[(x==0.) & (gradient > 0.)] = 0.
-            return loss, gradient
-
-        # TODO: test whether better to zero out past geometry of zeroed out vars
+            inactive_set = (x==0.) & (gradient > 0.)
+            gradient[inactive_set] = 0.
+            return loss, gradient, ~inactive_set
 
         result = sp.optimize.fmin_l_bfgs_b(
             loss_and_gradient_function, x0=np.zeros(n), bounds=[[0, None]]*n)
         print(result)
 
         x = lbfgs.minimize_lbfgs(
             loss_and_gradient_function=loss_and_gradient_function,
-            initial_point=np.zeros(n), memory=0, max_iters=100, c_1=1e-3,
+            initial_point=np.zeros(n), memory=5, max_iters=100, c_1=1e-3,
             c_2=0.9, max_ls=20)
 
         self.assertGreaterEqual(np.min(x), 0.)