plugged in prototype solver without hsde

enzbus · Jun 26, 2024 · 8e4a7c2 · 8e4a7c2
1 parent 8e8e5de
commit 8e4a7c2
Show file tree

Hide file tree

Showing 7 changed files with 228 additions and 24 deletions.
diff --git a/project_euromir/cones.py b/project_euromir/cones.py
@@ -15,7 +15,9 @@
 
 import numpy as np
 
-from .config import NONUMBA
+# from .config import NONUMBA
+
+NONUMBA = True
 
 if not NONUMBA: # pragma: no cover
     import numba as nb

diff --git a/project_euromir/cvxpy_solver.py b/project_euromir/cvxpy_solver.py
@@ -52,10 +52,14 @@
     raise ImportError(
         "Can't use CVXPY interface if CVXPY is not installed!") from exc
 
-from project_euromir import solve as solve_lbfgs
-from project_euromir.solver_cg import solve as solve_ncg
+# original
+# from project_euromir import solve
 
-NCG = False
+# newton cg
+# from project_euromir.solver_cg import solve
+
+# without hsde
+from project_euromir.solver_nohsde import solve
 
 
 class Solver(ConicSolver):
@@ -77,7 +81,7 @@ def solve_via_data(
             solver_cache=None):
         """Main method."""
 
-        x_orig, y_orig, s_orig = (solve_ncg if NCG else solve_lbfgs)(
+        x_orig, y_orig, s_orig = solve(
             matrix=data['A'], b=data['b'], c=data['c'], zero=data['dims'].zero,
             nonneg=data['dims'].nonneg)
 

diff --git a/project_euromir/loss.py b/project_euromir/loss.py
@@ -31,30 +31,42 @@
 import numpy as np
 import scipy as sp
 
+HSDE_SCALING_PENALTY = False
+
 ###
 # The loss function used in the main loop
 ###
 
-def create_workspace_main(n, zero, nonneg):
+def create_workspace_main(Q, n, zero, nonneg):
     """Create workspace used in main loop."""
     workspace = {}
     len_var = n + zero + nonneg + 1
     workspace['u_conic_error'] = np.empty(nonneg+1, dtype=float)
     workspace['v_conic_error'] = np.empty(len_var, dtype=float)
     workspace['gradient'] = np.empty(len_var, dtype=float)
+    if HSDE_SCALING_PENALTY:
+        workspace['scaler_loss'] = None
+        workspace['scaler'] = Q[-1].todense().A1
+        workspace['scaler'][-1] = 1.
     return workspace
 
 def common_computation_main(u, Q, n, zero, nonneg, workspace):
     """Do common computation, in workspace, for input u."""
     workspace['u_conic_error'][:] = np.minimum(u[n+zero:], 0.)
     workspace['v_conic_error'][:] = Q @ u
+    if HSDE_SCALING_PENALTY:
+        workspace['v_m_1'] = workspace['v_conic_error'][-1]
     workspace['v_conic_error'][n+zero:] = np.minimum(
         workspace['v_conic_error'][n+zero:], 0.)
 
 def loss(u, Q, n, zero, nonneg, workspace):
     """Compute loss from workspace."""
-    return (np.linalg.norm(workspace['u_conic_error'])**2
+    result = (np.linalg.norm(workspace['u_conic_error'])**2
         + np.linalg.norm(workspace['v_conic_error'])**2)
+    if HSDE_SCALING_PENALTY:
+        workspace['scaler_loss'] = (workspace['scaler'] @ u - 1)**2
+        result += workspace['scaler_loss']
+    return result
 
 def gradient(u, Q, n, zero, nonneg, workspace):
     """Compute gradient from workspace.
@@ -63,6 +75,8 @@ def gradient(u, Q, n, zero, nonneg, workspace):
     """
     workspace['gradient'][:] = 2 * (Q.T @ workspace['v_conic_error'])
     workspace['gradient'][n+zero:] += 2 * workspace['u_conic_error']
+    if HSDE_SCALING_PENALTY:
+        workspace['gradient'][:] += 2 * workspace['scaler'] * workspace['scaler_loss']
     return workspace['gradient']
 
 def hessian(u, Q, n, zero, nonneg, workspace):
@@ -74,6 +88,8 @@ def _matvec(myvar):
         tmp[n+zero:] *= (workspace['v_conic_error'][n+zero:] < 0)
         tmp = Q.T @ tmp
         tmp[n+zero:] += (workspace['u_conic_error'] < 0) * myvar[n+zero:]
+        if HSDE_SCALING_PENALTY:
+            tmp[:] += workspace['scaler'] * (workspace['scaler'] @ myvar)
         return 2 * tmp
 
     return sp.sparse.linalg.LinearOperator(
@@ -212,7 +228,7 @@ def _rmatvec(myvar):
         [-c.reshape(1, n), -b.reshape(1, m), None],
         ]).tocsc()
 
-    workspace = create_workspace_main(n, zero, nonneg)
+    workspace = create_workspace_main(Q, n, zero, nonneg)
     u = np.random.randn(m+n+1)
     common_computation_main(u, Q, n, zero, nonneg, workspace)
 

diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py
@@ -388,10 +388,9 @@ def terminate(warnflag, msg):
             dri0 = dri1          # update np.dot(ri,ri) for next time.
         else:
             # curvature keeps increasing, bail out
-            pass
-            #msg = ("Warning: CG iterations didn't converge. The Hessian is not "
-            #       "positive definite.")
-            #return terminate(3, msg)
+            msg = ("Warning: CG iterations didn't converge. The Hessian is not "
+                  "positive definite.")
+            return terminate(3, msg)
 
         pk = xsupi  # search direction is solution to system.
         gfk = -b    # gradient at xk

diff --git a/project_euromir/refinement.py b/project_euromir/refinement.py
@@ -34,7 +34,7 @@
 import scipy as sp
 
 
-def refine(z, matrix, b, c, zero, nonneg):
+def refine(z, matrix, b, c, zero, nonneg, max_iters=None):
     """All Python for now, will call all the rest.
 
     Equilibration is not done here, data must be already transformed.
@@ -78,7 +78,8 @@ def project(variable):
     # call LSQR
     start = time.time()
     result = sp.sparse.linalg.lsqr(
-        DR, residual, atol=0., btol=0., iter_lim=Q.shape[0]*2)
+        DR, residual, atol=0., btol=0.,
+        iter_lim=(Q.shape[0]*2) if max_iters is None else max_iters)
     print('LSQR result[1:-1]', result[1:-1])
     print('LSQR took', time.time() - start)
     dz = result[0]

diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py
@@ -70,7 +70,7 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
         ]).tocsc()
 
     # prepare workspace
-    workspace = create_workspace_main(n, zero, nonneg)
+    workspace = create_workspace_main(Q, n, zero, nonneg)
 
     # these functions should be unpacked inside newton-cg
     def separated_loss(u):
@@ -93,13 +93,13 @@ def separated_hessian(u):
     def callback(current):
         print('current loss', current.fun)
         print('current kappa', current.x[-1])
-        if current.fun < np.finfo(float).eps**2:
-            raise StopIteration
-        if current.x[-1] < 1e-2:
-            current.x[:] = old_x
-            raise StopIteration
-            # current['x'] /= current.x[-1]
-        old_x[:] = current.x
+        # if current.fun < np.finfo(float).eps**2:
+        #     raise StopIteration
+        # if current.x[-1] < 1e-2:
+        #     current.x[:] = old_x
+        #     raise StopIteration
+        #     # current['x'] /= current.x[-1]
+        # old_x[:] = current.x
 
     start = time.time()
     # call newton-CG, implementation from scipy with modified termination
@@ -114,10 +114,10 @@ def callback(current):
         xtol=0., #1e-5,
         eps=_epsilon, # unused
         maxiter=10000,
-        max_cg_iters=100,
+        # max_cg_iters=100,
         disp=1,
         return_all=False,
-        c1=1e-4, c2=0.1)
+        c1=1e-4, c2=0.9)
     breakpoint()
     print(f'NEWTON-CG took {time.time() - start:.3f} seconds')
 

diff --git a/project_euromir/solver_nohsde.py b/project_euromir/solver_nohsde.py
@@ -0,0 +1,182 @@
+# BSD 3-Clause License
+
+# Copyright (c) 2024-, Enzo Busseti
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Solver main function, will be unpacked and call all the rest."""
+
+import time
+
+import numpy as np
+import scipy as sp
+
+from project_euromir import equilibrate
+from project_euromir.lbfgs import minimize_lbfgs
+from project_euromir.refinement import refine
+
+DEBUG = False
+if DEBUG:
+    import matplotlib.pyplot as plt
+
+
+def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
+    "Main function."
+
+    # some shape checking
+    n = len(c)
+    m = len(b)
+    assert matrix.shape == (m, n)
+    assert zero + nonneg == m
+
+    # equilibration
+    d, e, sigma, rho, matrix_transf, b_transf, c_transf = \
+    equilibrate.hsde_ruiz_equilibration(
+            matrix, b, c, dimensions={
+                'zero': zero, 'nonneg': nonneg, 'second_order': ()},
+            max_iters=25)
+
+    # define (loss, gradient) function; this is for LPs only
+
+    # preallocate some variables
+    y_error = np.empty(m-zero, dtype=float)
+    s_error = np.empty(m, dtype=float)
+    dual_residual = np.empty(n, dtype=float)
+    s = np.empty(m, dtype=float)
+    gradient = np.empty(n+m, dtype=float)
+
+    # variable is xy
+    def loss_gradient(xy):
+        """Function for LBFGS loop, used in line search as well."""
+
+        x = xy[:n]
+        y = xy[n:]
+
+        # zero cone dual variable is unconstrained
+        y_error[:] = np.minimum(y[zero:], 0.)
+
+        # this must be all zeros
+        dual_residual[:] = matrix_transf.T @ y + c_transf
+
+        # slacks
+        s[:] = -matrix_transf @ x + b_transf
+
+        # slacks for zero cone must be zero
+        s_error[:zero] = s[:zero]
+        s_error[zero:] = np.minimum(s[zero:], 0.)
+
+        # duality gap
+        gap = c_transf.T @ x + b_transf.T @ y
+
+        # loss
+        loss = np.linalg.norm(y_error)**2
+        loss += np.linalg.norm(dual_residual)**2
+        loss += np.linalg.norm(s_error)**2
+        loss += gap**2
+
+        # return loss
+
+        # dual residual sqnorm
+        gradient[n:] = 2 * (matrix_transf @ dual_residual)
+
+        # s_error sqnorm
+        gradient[:n] = -2 * (matrix_transf.T @ s_error)
+
+        # y_error sqnorm
+        gradient[n+zero:] += 2 * y_error
+
+        # gap sq
+        gradient[:n] += (2 * gap) * c_transf
+        gradient[n:] += (2 * gap) * b_transf
+
+        return loss, gradient
+
+    def callback(xy):
+        loss, _ = loss_gradient(xy)
+        # loss = loss_gradient(xy)
+        print(loss)
+
+    # initial variable, can pass initial guess
+    x_0 = np.zeros(n+m)
+
+    # call LBFGS
+    start = time.time()
+    lbfgs_result = sp.optimize.fmin_l_bfgs_b(
+        loss_gradient,
+        x0=x_0,
+        m=lbfgs_memory,
+        # approx_grad=True,
+        maxfun=1e10,
+        factr=0.,
+        pgtol=0.,
+        callback=callback if DEBUG else None,
+        maxiter=1e10)
+    # print LBFGS stats
+    function_value = lbfgs_result[1]
+    print('LOSS AT THE END OF LBFGS', function_value)
+    stats = lbfgs_result[2]
+    stats.pop('grad')
+    print('LBFGS stats', stats)
+    result_variable = lbfgs_result[0]
+
+    print('LBFGS took', time.time() - start)
+
+    # refinement, still based on hsde
+    Q = sp.sparse.bmat([
+        [None, matrix_transf.T, c_transf.reshape(n, 1)],
+        [-matrix_transf, None, b_transf.reshape(m, 1)],
+        [-c_transf.reshape(1, n), -b_transf.reshape(1, m), None],
+        ]).tocsc()
+    u = np.empty(n+m+1)
+
+    # create hsde variables
+    u[:-1] = result_variable
+    u[-1] = 1.
+    v = Q @ u
+
+    # refine
+    for i in range(5):
+        u, v = refine(
+            z=u-v, matrix=matrix_transf, b=b_transf, c=c_transf, zero=zero,
+            nonneg=nonneg, max_iters=100)
+
+    # Transform back into problem format
+    u1, u2, u3 = u[:n], u[n:n+m], u[-1]
+    v2, v3 = v[n:n+m], v[-1]
+
+    if v3 > u3:
+        raise NotImplementedError('Certificates not yet implemented.')
+
+    # Apply HSDE scaling
+    x = u1 / u3
+    y = u2 / u3
+    s = v2 / u3
+
+    # invert Ruiz scaling, copied from other repo
+    x_orig =  e * x / sigma
+    y_orig = d * y / rho
+    s_orig = (s/d) / sigma
+
+    return x_orig, y_orig, s_orig