re-activated presolve;

new plan is to pull in SuiteSparseQR, which is also GPL; ideally Minamide will work, or something similar
enzbus · Aug 10, 2024 · 9b9eab9 · 9b9eab9
1 parent 093fb86
commit 9b9eab9
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 10 deletions.
diff --git a/project_euromir/direction_calculator.py b/project_euromir/direction_calculator.py
@@ -326,6 +326,7 @@ def __init__(
         self._residual_function = residual_function
         self._derivative_residual_function = derivative_residual_function
         self._warm_start = warm_start
+        self._statistics = {'HESSIAN_MATMULS':  0}
 
     def _inner_function(self, derivative_residual, residual, current_gradient):
         """In order to replace with LSMR below."""
@@ -348,8 +349,10 @@ def get_direction(
         """
         residual = self._residual_function(current_point)
         derivative_residual = self._derivative_residual_function(current_point)
+        # breakpoint()
         solution, n_iters = self._inner_function(
             derivative_residual, residual, current_gradient)
+        self._statistics['HESSIAN_MATMULS'] += n_iters
         logger.info(
             '%s calculated direction with error norm %.2e, while the input'
             ' gradient had norm %.2e, in %d iterations',
@@ -367,8 +370,9 @@ class LSMRLevenbergMarquardt(LSQRLevenbergMarquardt):
 
     def _inner_function(self, derivative_residual, residual, current_gradient):
         """Just the call to the iterative solver."""
+        breakpoint()
         result = sp.sparse.linalg.lsmr(
-            derivative_residual, -residual, x0=self._x0, # damp=1e-06, # seems
+            derivative_residual, -residual, x0=self._x0, damp=1e-06, # seems
             # that up to about 1e-6 performance is not affected
             atol=min(0.5, np.linalg.norm(current_gradient)), btol=0.)
         return result[0], result[2] # solution, number of iterations
diff --git a/project_euromir/minamide.py b/project_euromir/minamide.py
@@ -76,6 +76,7 @@ def _matvec(dy):
 
 
 from .direction_calculator import DirectionCalculator, _densify
+from .minresQLP import MinresQLP
 
 
 class MinamideTest(DirectionCalculator):
@@ -96,8 +97,8 @@ def get_direction(self, current_point, current_gradient):
         # regularization is needed
 
         # linear operators
-        hx = self._hessian_x_nogap(current_point[:self._n])#  + np.eye(self._n) * REGU
-        hy = self._hessian_y_nogap(current_point[self._n:])#  + np.eye(self._m) * REGU
+        hx = self._hessian_x_nogap(current_point[:self._n])  #+ np.eye(self._n) * 1e-12
+        hy = self._hessian_y_nogap(current_point[self._n:])  #+ np.eye(self._m) * 1e-12
 
         # minamide notation
 
@@ -126,6 +127,10 @@ def _splus_matvec(array):
                 np.linalg.lstsq(hx_dense, array[:self._n], rcond=None)[0],
                 np.linalg.lstsq(hy_dense, array[self._n:], rcond=None)[0]
             ])
+        Splusdense = sp.sparse.linalg.LinearOperator(
+            shape = (self._n + self._m, self._n + self._m),
+            matvec = _splus_matvec
+        )
 
         # def _splus_matvec(array):
         #     return np.concatenate([
@@ -135,16 +140,31 @@ def _splus_matvec(array):
 
         # def _splus_matvec(array):
         #     return np.concatenate([
-        #         sp.sparse.linalg.minres(hx, array[:self._n], rtol=min(0.5, np.linalg.norm(array[:self._n])**0.5))[0],
-        #         sp.sparse.linalg.minres(hy, array[self._n:], rtol=min(0.5, np.linalg.norm(array[self._n:])**0.5))[0]
+        #         sp.sparse.linalg.minres(hx, array[:self._n], shift=1-8, rtol=1e-16)[0],
+        #         sp.sparse.linalg.minres(hy, array[self._n:], shift=1e-8, rtol=1e-16)[0]
         #     ])
 
+        def _splus_matvec(array):
+            return np.concatenate([
+                MinresQLP(hx, array[:self._n], rtol=1e-64, maxit=10000)[0],
+                MinresQLP(hy, array[self._n:], rtol=1e-64, maxit=10000)[0]
+            ])
+
         Splus = sp.sparse.linalg.LinearOperator(
             shape = (self._n + self._m, self._n + self._m),
             matvec = _splus_matvec
         )
 
         Splusphi = Splus @ phi
+        #import matplotlib.pyplot as plt
+        Splusphi_dense = Splusdense @ phi
+        print('NORM DIFF SPLUS PHI vs dense',
+            np.linalg.norm(Splusphi-Splusphi_dense))
+
+        Splusphi = Splusphi_dense
+
+        #plt.plot(Splusphi); plt.plot(Splusphi_dense); plt.show()
+        #breakpoint()
         Tphi = phi - S @ Splusphi
 
         # breakpoint()
@@ -172,6 +192,12 @@ def _splus_matvec(array):
             #pinv_rebuilt = Splus - np.outer(Splusphi, Splusphi) / (
             #    1 + phi @ Splusphi)
             result = Splus @ rhs
+            result_dense = Splusdense @ rhs
+            print('NORM DIFF result vs dense',
+                np.linalg.norm(result-result_dense))
+            #plt.plot(result); plt.plot(result_dense); plt.show()
+            result = result_dense
+            #breakpoint()
             result -= Splusphi * ((Splusphi @ rhs) /  (1 + phi @ Splusphi))
             return result + FINAL_REGU * rhs
 
@@ -189,6 +215,12 @@ def _splus_matvec(array):
 
             # multiply by Splus
             result = Splus @ result
+            result_dense = Splusdense @ rhs
+            #plt.plot(result); plt.plot(result_dense); plt.show()
+            #breakpoint()
+            print('NORM DIFF result vs dense',
+                np.linalg.norm(result-result_dense))
+            result = result_dense
 
             # multiply by left
             result = result - Tphi * ((phi @ result) / (phi @ Tphi))

diff --git a/project_euromir/solver_new.py b/project_euromir/solver_new.py
@@ -41,6 +41,7 @@
 
 logger = logging.getLogger(__name__)
 
+QR_PRESOLVE = True
 
 def solve(matrix, b, c, zero, nonneg,
         # xy = None, # need to import logic for equilibration
@@ -67,6 +68,15 @@ def solve(matrix, b, c, zero, nonneg,
                 'zero': zero, 'nonneg': nonneg, 'second_order': ()},
             max_iters=25)
 
+    if QR_PRESOLVE:
+        q, r = np.linalg.qr(
+            np.vstack([matrix_transf.todense(), c_transf.reshape((1, n))]))
+        matrix_transf = q[:-1].A
+        c_transf = q[-1].A1
+        sigma_qr = np.linalg.norm(
+            b_transf) / np.mean(np.linalg.norm(matrix_transf, axis=1))
+        b_transf = b_transf/sigma_qr
+
     workspace = create_workspace(m, n, zero)
 
     def _local_loss(xy):
@@ -122,10 +132,6 @@ def _local_derivative_residual(xy):
         loss_function=_local_loss,
         max_iters=1000)
 
-    direction_calculator = MinamideTest(
-        b=b_transf, c=c_transf, h_x_nogap=_local_hessian_x_nogap,
-        h_y_nogap=_local_hessian_y_nogap)
-
     direction_calculator = WarmStartedCGNewton(
         # warm start causes issues if null space changes b/w iterations
         hessian_function=_local_hessian,
@@ -186,6 +192,9 @@ def _local_derivative_residual(xy):
     #     #rtol_termination=nocedal_wright_termination,
     #     #max_cg_iters=None,
     #     )
+    # direction_calculator = MinamideTest(
+    #     b=b_transf, c=c_transf, h_x_nogap=_local_hessian_x_nogap,
+    #     h_y_nogap=_local_hessian_y_nogap)
 
     _start = time.time()
     # extra_iters=5
@@ -198,7 +207,9 @@ def _local_derivative_residual(xy):
             'Iteration %d, current loss %.2e, current inf norm grad %.2e',
             newton_iterations, loss_xy, np.max(np.abs(grad_xy)))
 
-        if np.linalg.norm(grad_xy)/(n+m) < np.finfo(float).eps:
+        if ((np.linalg.norm(grad_xy)/(n+m) < np.finfo(float).eps)) and \
+                loss_xy < 1e-24:
+                # temporary fix, this termination needs to be softer and smarter
             logger.info('Converged in %d iterations.', newton_iterations)
         #     extra_iters -= 1
         # if extra_iters == 0:
@@ -228,6 +239,7 @@ def _local_derivative_residual(xy):
             'Certificates not yet implemented.')
 
     print('Newton-CG loop took %.2e seconds' % (time.time() - _start ))
+    print('Loss at end of Newton-CG loop: %.2e' % loss_xy)
     print('Newton-CG iterations', newton_iterations)
     print('DirectionCalculator statistics', direction_calculator.statistics)
     print('LineSearcher statistics', line_searcher.statistics)
@@ -260,6 +272,10 @@ def _local_derivative_residual(xy):
     y = u2 / u3
     s = v2 / u3
 
+    if QR_PRESOLVE:
+        x = np.linalg.solve(r, x) * sigma_qr
+        s *= sigma_qr
+
     # invert Ruiz scaling, copied from other repo
     x_orig = e * x / sigma
     y_orig = d * y / rho