trying ncg with new formulation

enzbus · Jun 27, 2024 · e5ad081 · e5ad081
1 parent 4e49d60
commit e5ad081
Show file tree

Hide file tree

Showing 7 changed files with 296 additions and 51 deletions.
diff --git a/project_euromir/cvxpy_solver.py b/project_euromir/cvxpy_solver.py
@@ -52,13 +52,20 @@
     raise ImportError(
         "Can't use CVXPY interface if CVXPY is not installed!") from exc
 
-# original
-from project_euromir import solve
-# without hsde
-from project_euromir.solver_nohsde import solve
+# original, with hsde
+# from project_euromir import solve
 
-# newton cg
-# from project_euromir.solver_cg import solve
+BFGS = False
+NEWTON_CG = not BFGS
+
+if BFGS:
+    # without hsde
+    from project_euromir.solver_nohsde import solve
+
+if NEWTON_CG:
+
+    # newton cg
+    from project_euromir.solver_cg import solve
 
 
 class Solver(ConicSolver):

diff --git a/project_euromir/lbfgs.py b/project_euromir/lbfgs.py
@@ -51,7 +51,7 @@
     'g': np.array([-1.]),
     'ftol': np.array([1e-3]),
     'gtol': np.array([0.9]),
-    'xtol': np.array([1e-5]), # TODO: figure out if this depends on scale
+    'xtol': np.array([1e-8]), # TODO: figure out if this depends on scale
     'stpmin': np.array([0.]),
     'stpmax': np.array([1000.]), # in lbfgsb this is set iteratively...
     'isave': np.zeros(20, dtype=np.int32),

diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py
@@ -0,0 +1,186 @@
+# BSD 3-Clause License
+
+# Copyright (c) 2024-, Enzo Busseti
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Define loss function(s), gradient(s), ...."""
+
+import numpy as np
+import scipy as sp
+
+HSDE_SCALING_PENALTY = False
+
+###
+# The loss function used in the main loop
+###
+
+def create_workspace(m, n, zero):
+    workspace = {}
+
+    # preallocate some variables
+    workspace['y_error'] = np.empty(m-zero, dtype=float)
+    workspace['s_error'] = np.empty(m, dtype=float)
+    workspace['dual_residual'] = np.empty(n, dtype=float)
+    workspace['s'] = np.empty(m, dtype=float)
+    workspace['gradient'] = np.empty(n+m, dtype=float)
+
+    return workspace
+
+# variable is xy
+def loss_gradient(xy, m, n, zero, matrix, b, c, workspace):
+    """Function for LBFGS loop, used in line search as well."""
+
+    x = xy[:n]
+    y = xy[n:]
+
+    # zero cone dual variable is unconstrained
+    workspace['y_error'][:] = np.minimum(y[zero:], 0.)
+
+    # this must be all zeros
+    workspace['dual_residual'][:] = matrix.T @ y + c
+
+    # slacks
+    workspace['s'][:] = -matrix @ x + b
+
+    # slacks for zero cone must be zero
+    workspace['s_error'][:zero] = workspace['s'][:zero]
+    workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)
+
+    # duality gap
+    gap = c.T @ x + b.T @ y
+
+    # loss
+    loss = np.linalg.norm(workspace['y_error'])**2
+    loss += np.linalg.norm(workspace['dual_residual'])**2
+    loss += np.linalg.norm(workspace['s_error'])**2
+    loss += gap**2
+
+    # dual residual sqnorm
+    workspace['gradient'][n:] = 2 * (matrix @ workspace['dual_residual'])
+
+    # s_error sqnorm
+    workspace['gradient'][:n] = -2 * (matrix.T @ workspace['s_error'])
+
+    # y_error sqnorm
+    workspace['gradient'][n+zero:] += 2 * workspace['y_error']
+
+    # gap sq
+    workspace['gradient'][:n] += (2 * gap) * c
+    workspace['gradient'][n:] += (2 * gap) * b
+
+    return loss, workspace['gradient']
+
+def hessian(xy, m, n, zero, matrix, b, c, workspace):
+    """Hessian to use inside LBFGS loop."""
+
+    locals().update(workspace)
+
+    x = xy[:n]
+    y = xy[n:]
+
+    # zero cone dual variable is unconstrained
+    workspace['y_error'][:] = np.minimum(y[zero:], 0.)
+
+    # this must be all zeros
+    workspace['dual_residual'][:] = matrix.T @ y + c
+
+    # slacks
+    workspace['s'][:] = -matrix @ x + b
+
+    # slacks for zero cone must be zero
+    workspace['s_error'][:zero] = workspace['s'][:zero]
+    workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)
+
+    def _matvec(dxdy):
+        result = np.empty_like(dxdy)
+        dx = dxdy[:n]
+        dy = dxdy[n:]
+
+        # dual residual sqnorm
+        result[n:] = 2 * (matrix @ (matrix.T @ dy))
+
+        # s_error sqnorm
+        s_mask = np.ones(m, dtype=float)
+        s_mask[zero:] = workspace['s_error'][zero:] < 0.
+        result[:n] = 2 * (matrix.T @ (s_mask * (matrix @ dx)))
+
+        # y_error sqnorm
+        y_mask = np.ones(m-zero, dtype=float)
+        y_mask[:] = workspace['y_error'] < 0.
+        result[n+zero:] += 2 * y_mask * dy[zero:]
+
+        # gap
+        constants = np.concatenate([c, b])
+        result[:] += constants * (2 * (constants @ dxdy))
+
+        return result
+
+    return sp.sparse.linalg.LinearOperator(
+        shape=(len(xy), len(xy)),
+        matvec=_matvec
+    )
+
+def _densify(linear_operator):
+    assert linear_operator.shape[0] == linear_operator.shape[1]
+    result = np.empty(linear_operator.shape)
+    identity = np.eye(result.shape[0])
+    for i in range(len(identity)):
+        result[:, i] = linear_operator.matvec(identity[:, i])
+    return result
+
+
+if __name__ == '__main__':
+
+    from scipy.optimize import check_grad
+
+    # create consts
+    np.random.seed(0)
+    m = 20
+    n = 10
+    zero = 5
+    nonneg = 15
+    matrix = np.random.randn(m, n)
+    b = np.random.randn(m)
+    c = np.random.randn(n)
+
+    wks = create_workspace(m, n, zero)
+
+    def my_loss(xy):
+        return loss_gradient(xy, m, n, zero, matrix, b, c, wks)[0]
+
+    def my_grad(xy):
+        return np.copy(loss_gradient(xy, m, n, zero, matrix, b, c, wks)[1])
+
+    def my_hessian(xy):
+        return _densify(hessian(xy, m, n, zero, matrix, b, c, wks))
+
+    print('\nCHECKING GRADIENT')
+    for i in range(10):
+        print(check_grad(my_loss, my_grad, np.random.randn(n+m)))
+
+    print('\nCHECKING HESSIAN')
+    for i in range(10):
+        print(check_grad(my_grad, my_hessian, np.random.randn(n+m)))
diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py
@@ -352,7 +352,7 @@ def terminate(warnflag, msg):
 
         for k2 in range(cg_maxiter):
             if np.add.reduce(np.abs(ri)) <= termcond:
-                print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
+                # print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
                 # breakpoint()
                 break
             if fhess is None:
@@ -369,7 +369,7 @@ def terminate(warnflag, msg):
             Ap = asarray(Ap).squeeze()  # get rid of matrices...
             curv = np.dot(psupi, Ap)
             if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps:
-                print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
+                # print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
                 break
             elif curv < 0:
                 if (i > 0):
@@ -414,7 +414,7 @@ def terminate(warnflag, msg):
             return terminate(5, "")
         update_l1norm = np.linalg.norm(update, ord=1)
 
-        print(f'update_l1norm, {update_l1norm:.2e}')
+        # print(f'update_l1norm, {update_l1norm:.2e}')
 
     else:
         if np.isnan(old_fval) or np.isnan(update_l1norm):

diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py
@@ -33,11 +33,18 @@
 import numpy as np
 import scipy as sp
 
+NOHSDE = True
+
 from project_euromir import equilibrate
 from project_euromir.lbfgs import minimize_lbfgs
-from project_euromir.loss import (common_computation_main,
-                                  create_workspace_main, gradient, hessian,
-                                  loss)
+
+if not NOHSDE:
+    from project_euromir.loss import (common_computation_main,
+                                      create_workspace_main, gradient, hessian,
+                                      loss)
+else:
+    from project_euromir.loss_no_hsde import (create_workspace,  loss_gradient, hessian)
+
 from project_euromir.newton_cg import _epsilon, _minimize_newtoncg
 from project_euromir.refinement import refine
 
@@ -69,30 +76,51 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
         [-c_transf.reshape(1, n), -b_transf.reshape(1, m), None],
         ]).tocsc()
 
-    # prepare workspace
-    workspace = create_workspace_main(Q, n, zero, nonneg)
+    if not NOHSDE:
+
+        # prepare workspace
+        workspace = create_workspace_main(Q, n, zero, nonneg)
+
+        # these functions should be unpacked inside newton-cg
+        def separated_loss(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return loss(u, Q, n, zero, nonneg, workspace)
+
+        def separated_grad(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return np.copy(gradient(u, Q, n, zero, nonneg, workspace))
 
-    # these functions should be unpacked inside newton-cg
-    def separated_loss(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return loss(u, Q, n, zero, nonneg, workspace)
+        def separated_hessian(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return hessian(u, Q, n, zero, nonneg, workspace)
 
-    def separated_grad(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return np.copy(gradient(u, Q, n, zero, nonneg, workspace))
+        x_0 = np.zeros(n+m+1)
+        x_0[-1] = 1.
 
-    def separated_hessian(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return hessian(u, Q, n, zero, nonneg, workspace)
+    else:
 
-    x_0 = np.zeros(n+m+1)
-    x_0[-1] = 1.
+        workspace = create_workspace(m, n, zero)
+
+        # these functions should be unpacked inside newton-cg
+        def separated_loss(xy):
+            return loss_gradient(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[0]
+
+        def separated_grad(xy):
+            return np.copy(loss_gradient(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[1])
+
+        def separated_hessian(xy):
+            return hessian(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)
+
+        x_0 = np.zeros(n+m)
 
     old_x = np.empty_like(x_0)
 
     def callback(current):
         print('current loss', current.fun)
-        print('current kappa', current.x[-1])
+        # print('current kappa', current.x[-1])
         # if current.fun < np.finfo(float).eps**2:
         #     raise StopIteration
         # if current.x[-1] < 1e-2:
@@ -110,20 +138,27 @@ def callback(current):
         jac=separated_grad,
         hess=separated_hessian,
         hessp=None,
-        callback=callback,
+        # callback=callback,
         xtol=0., #1e-5,
         eps=_epsilon, # unused
         maxiter=10000,
         # max_cg_iters=100,
         disp=1,
         return_all=False,
         c1=1e-4, c2=0.9)
-    breakpoint()
+    # breakpoint()
     print(f'NEWTON-CG took {time.time() - start:.3f} seconds')
 
+    if not NOHSDE:
     # extract result
-    u = result['x']
-    assert u[-1] > 0, 'Certificates not yet implemented'
+        u = result['x']
+        assert u[-1] > 0, 'Certificates not yet implemented'
+    else:
+        xy = result['x']
+        u = np.empty(n+m+1, dtype=float)
+        u[:-1] = xy
+        u[-1] = 1.
+
     # u /= u[-1]
     v = Q @ u