From e5ad081878f87a57ac9e49ec9ef5d1d1d23d7207 Mon Sep 17 00:00:00 2001
From: Enzo Busseti <enzo.busseti@me.com>
Date: Thu, 27 Jun 2024 11:39:46 +0400
Subject: [PATCH] trying ncg with new formulation

---
 project_euromir/cvxpy_solver.py      |  19 ++-
 project_euromir/lbfgs.py             |   2 +-
 project_euromir/loss_no_hsde.py      | 186 +++++++++++++++++++++++++++
 project_euromir/newton_cg.py         |   6 +-
 project_euromir/solver_cg.py         |  79 ++++++++----
 project_euromir/solver_nohsde.py     |  51 +++++---
 project_euromir/tests/test_solver.py |   4 +-
 7 files changed, 296 insertions(+), 51 deletions(-)
 create mode 100644 project_euromir/loss_no_hsde.py

diff --git a/project_euromir/cvxpy_solver.py b/project_euromir/cvxpy_solver.py
index 4110568..e80dfbe 100644
--- a/project_euromir/cvxpy_solver.py
+++ b/project_euromir/cvxpy_solver.py
@@ -52,13 +52,20 @@
     raise ImportError(
         "Can't use CVXPY interface if CVXPY is not installed!") from exc
 
-# original
-from project_euromir import solve
-# without hsde
-from project_euromir.solver_nohsde import solve
+# original, with hsde
+# from project_euromir import solve
 
-# newton cg
-# from project_euromir.solver_cg import solve
+BFGS = False
+NEWTON_CG = not BFGS
+
+if BFGS:
+    # without hsde
+    from project_euromir.solver_nohsde import solve
+
+if NEWTON_CG:
+
+    # newton cg
+    from project_euromir.solver_cg import solve
 
 
 class Solver(ConicSolver):
diff --git a/project_euromir/lbfgs.py b/project_euromir/lbfgs.py
index b75df2d..c99c5da 100644
--- a/project_euromir/lbfgs.py
+++ b/project_euromir/lbfgs.py
@@ -51,7 +51,7 @@
     'g': np.array([-1.]),
     'ftol': np.array([1e-3]),
     'gtol': np.array([0.9]),
-    'xtol': np.array([1e-5]), # TODO: figure out if this depends on scale
+    'xtol': np.array([1e-8]), # TODO: figure out if this depends on scale
     'stpmin': np.array([0.]),
     'stpmax': np.array([1000.]), # in lbfgsb this is set iteratively...
     'isave': np.zeros(20, dtype=np.int32),
diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py
new file mode 100644
index 0000000..d1b30b2
--- /dev/null
+++ b/project_euromir/loss_no_hsde.py
@@ -0,0 +1,186 @@
+# BSD 3-Clause License
+
+# Copyright (c) 2024-, Enzo Busseti
+
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+
+# 3. Neither the name of the copyright holder nor the names of its
+#    contributors may be used to endorse or promote products derived from
+#    this software without specific prior written permission.
+
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""Define loss function(s), gradient(s), ...."""
+
+import numpy as np
+import scipy as sp
+
+HSDE_SCALING_PENALTY = False
+
+###
+# The loss function used in the main loop
+###
+
+def create_workspace(m, n, zero):
+    workspace = {}
+
+    # preallocate some variables
+    workspace['y_error'] = np.empty(m-zero, dtype=float)
+    workspace['s_error'] = np.empty(m, dtype=float)
+    workspace['dual_residual'] = np.empty(n, dtype=float)
+    workspace['s'] = np.empty(m, dtype=float)
+    workspace['gradient'] = np.empty(n+m, dtype=float)
+
+    return workspace
+
+# variable is xy
+def loss_gradient(xy, m, n, zero, matrix, b, c, workspace):
+    """Function for LBFGS loop, used in line search as well."""
+
+    x = xy[:n]
+    y = xy[n:]
+
+    # zero cone dual variable is unconstrained
+    workspace['y_error'][:] = np.minimum(y[zero:], 0.)
+
+    # this must be all zeros
+    workspace['dual_residual'][:] = matrix.T @ y + c
+
+    # slacks
+    workspace['s'][:] = -matrix @ x + b
+
+    # slacks for zero cone must be zero
+    workspace['s_error'][:zero] = workspace['s'][:zero]
+    workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)
+
+    # duality gap
+    gap = c.T @ x + b.T @ y
+
+    # loss
+    loss = np.linalg.norm(workspace['y_error'])**2
+    loss += np.linalg.norm(workspace['dual_residual'])**2
+    loss += np.linalg.norm(workspace['s_error'])**2
+    loss += gap**2
+
+    # dual residual sqnorm
+    workspace['gradient'][n:] = 2 * (matrix @ workspace['dual_residual'])
+
+    # s_error sqnorm
+    workspace['gradient'][:n] = -2 * (matrix.T @ workspace['s_error'])
+
+    # y_error sqnorm
+    workspace['gradient'][n+zero:] += 2 * workspace['y_error']
+
+    # gap sq
+    workspace['gradient'][:n] += (2 * gap) * c
+    workspace['gradient'][n:] += (2 * gap) * b
+
+    return loss, workspace['gradient']
+
+def hessian(xy, m, n, zero, matrix, b, c, workspace):
+    """Hessian to use inside LBFGS loop."""
+
+    locals().update(workspace)
+
+    x = xy[:n]
+    y = xy[n:]
+
+    # zero cone dual variable is unconstrained
+    workspace['y_error'][:] = np.minimum(y[zero:], 0.)
+
+    # this must be all zeros
+    workspace['dual_residual'][:] = matrix.T @ y + c
+
+    # slacks
+    workspace['s'][:] = -matrix @ x + b
+
+    # slacks for zero cone must be zero
+    workspace['s_error'][:zero] = workspace['s'][:zero]
+    workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)
+
+    def _matvec(dxdy):
+        result = np.empty_like(dxdy)
+        dx = dxdy[:n]
+        dy = dxdy[n:]
+
+        # dual residual sqnorm
+        result[n:] = 2 * (matrix @ (matrix.T @ dy))
+
+        # s_error sqnorm
+        s_mask = np.ones(m, dtype=float)
+        s_mask[zero:] = workspace['s_error'][zero:] < 0.
+        result[:n] = 2 * (matrix.T @ (s_mask * (matrix @ dx)))
+
+        # y_error sqnorm
+        y_mask = np.ones(m-zero, dtype=float)
+        y_mask[:] = workspace['y_error'] < 0.
+        result[n+zero:] += 2 * y_mask * dy[zero:]
+
+        # gap
+        constants = np.concatenate([c, b])
+        result[:] += constants * (2 * (constants @ dxdy))
+
+        return result
+
+    return sp.sparse.linalg.LinearOperator(
+        shape=(len(xy), len(xy)),
+        matvec=_matvec
+    )
+
+def _densify(linear_operator):
+    assert linear_operator.shape[0] == linear_operator.shape[1]
+    result = np.empty(linear_operator.shape)
+    identity = np.eye(result.shape[0])
+    for i in range(len(identity)):
+        result[:, i] = linear_operator.matvec(identity[:, i])
+    return result
+
+
+if __name__ == '__main__':
+
+    from scipy.optimize import check_grad
+
+    # create consts
+    np.random.seed(0)
+    m = 20
+    n = 10
+    zero = 5
+    nonneg = 15
+    matrix = np.random.randn(m, n)
+    b = np.random.randn(m)
+    c = np.random.randn(n)
+
+    wks = create_workspace(m, n, zero)
+
+    def my_loss(xy):
+        return loss_gradient(xy, m, n, zero, matrix, b, c, wks)[0]
+
+    def my_grad(xy):
+        return np.copy(loss_gradient(xy, m, n, zero, matrix, b, c, wks)[1])
+
+    def my_hessian(xy):
+        return _densify(hessian(xy, m, n, zero, matrix, b, c, wks))
+
+    print('\nCHECKING GRADIENT')
+    for i in range(10):
+        print(check_grad(my_loss, my_grad, np.random.randn(n+m)))
+
+    print('\nCHECKING HESSIAN')
+    for i in range(10):
+        print(check_grad(my_grad, my_hessian, np.random.randn(n+m)))
diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py
index fd01f22..1b89458 100644
--- a/project_euromir/newton_cg.py
+++ b/project_euromir/newton_cg.py
@@ -352,7 +352,7 @@ def terminate(warnflag, msg):
 
         for k2 in range(cg_maxiter):
             if np.add.reduce(np.abs(ri)) <= termcond:
-                print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
+                # print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
                 # breakpoint()
                 break
             if fhess is None:
@@ -369,7 +369,7 @@ def terminate(warnflag, msg):
             Ap = asarray(Ap).squeeze()  # get rid of matrices...
             curv = np.dot(psupi, Ap)
             if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps:
-                print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
+                # print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
                 break
             elif curv < 0:
                 if (i > 0):
@@ -414,7 +414,7 @@ def terminate(warnflag, msg):
             return terminate(5, "")
         update_l1norm = np.linalg.norm(update, ord=1)
 
-        print(f'update_l1norm, {update_l1norm:.2e}')
+        # print(f'update_l1norm, {update_l1norm:.2e}')
 
     else:
         if np.isnan(old_fval) or np.isnan(update_l1norm):
diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py
index 55d0103..7f0fcf6 100644
--- a/project_euromir/solver_cg.py
+++ b/project_euromir/solver_cg.py
@@ -33,11 +33,18 @@
 import numpy as np
 import scipy as sp
 
+NOHSDE = True
+
 from project_euromir import equilibrate
 from project_euromir.lbfgs import minimize_lbfgs
-from project_euromir.loss import (common_computation_main,
-                                  create_workspace_main, gradient, hessian,
-                                  loss)
+
+if not NOHSDE:
+    from project_euromir.loss import (common_computation_main,
+                                      create_workspace_main, gradient, hessian,
+                                      loss)
+else:
+    from project_euromir.loss_no_hsde import (create_workspace,  loss_gradient, hessian)
+
 from project_euromir.newton_cg import _epsilon, _minimize_newtoncg
 from project_euromir.refinement import refine
 
@@ -69,30 +76,51 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
         [-c_transf.reshape(1, n), -b_transf.reshape(1, m), None],
         ]).tocsc()
 
-    # prepare workspace
-    workspace = create_workspace_main(Q, n, zero, nonneg)
+    if not NOHSDE:
+
+        # prepare workspace
+        workspace = create_workspace_main(Q, n, zero, nonneg)
+
+        # these functions should be unpacked inside newton-cg
+        def separated_loss(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return loss(u, Q, n, zero, nonneg, workspace)
+
+        def separated_grad(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return np.copy(gradient(u, Q, n, zero, nonneg, workspace))
 
-    # these functions should be unpacked inside newton-cg
-    def separated_loss(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return loss(u, Q, n, zero, nonneg, workspace)
+        def separated_hessian(u):
+            common_computation_main(u, Q, n, zero, nonneg, workspace)
+            return hessian(u, Q, n, zero, nonneg, workspace)
 
-    def separated_grad(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return np.copy(gradient(u, Q, n, zero, nonneg, workspace))
+        x_0 = np.zeros(n+m+1)
+        x_0[-1] = 1.
 
-    def separated_hessian(u):
-        common_computation_main(u, Q, n, zero, nonneg, workspace)
-        return hessian(u, Q, n, zero, nonneg, workspace)
+    else:
 
-    x_0 = np.zeros(n+m+1)
-    x_0[-1] = 1.
+        workspace = create_workspace(m, n, zero)
+
+        # these functions should be unpacked inside newton-cg
+        def separated_loss(xy):
+            return loss_gradient(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[0]
+
+        def separated_grad(xy):
+            return np.copy(loss_gradient(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[1])
+
+        def separated_hessian(xy):
+            return hessian(
+                xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)
+
+        x_0 = np.zeros(n+m)
 
     old_x = np.empty_like(x_0)
 
     def callback(current):
         print('current loss', current.fun)
-        print('current kappa', current.x[-1])
+        # print('current kappa', current.x[-1])
         # if current.fun < np.finfo(float).eps**2:
         #     raise StopIteration
         # if current.x[-1] < 1e-2:
@@ -110,7 +138,7 @@ def callback(current):
         jac=separated_grad,
         hess=separated_hessian,
         hessp=None,
-        callback=callback,
+        # callback=callback,
         xtol=0., #1e-5,
         eps=_epsilon, # unused
         maxiter=10000,
@@ -118,12 +146,19 @@ def callback(current):
         disp=1,
         return_all=False,
         c1=1e-4, c2=0.9)
-    breakpoint()
+    # breakpoint()
     print(f'NEWTON-CG took {time.time() - start:.3f} seconds')
 
+    if not NOHSDE:
     # extract result
-    u = result['x']
-    assert u[-1] > 0, 'Certificates not yet implemented'
+        u = result['x']
+        assert u[-1] > 0, 'Certificates not yet implemented'
+    else:
+        xy = result['x']
+        u = np.empty(n+m+1, dtype=float)
+        u[:-1] = xy
+        u[-1] = 1.
+
     # u /= u[-1]
     v = Q @ u
 
diff --git a/project_euromir/solver_nohsde.py b/project_euromir/solver_nohsde.py
index 2901420..e86a42a 100644
--- a/project_euromir/solver_nohsde.py
+++ b/project_euromir/solver_nohsde.py
@@ -42,8 +42,11 @@
 if DEBUG:
     import matplotlib.pyplot as plt
 
+QR_PRESOLVE = False
 
-def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
+def solve(
+    matrix, b, c, zero, nonneg, lbfgs_memory=10, refinement_iters=50,
+    refinement_lsqr_iters=100):
     "Main function."
 
     # some shape checking
@@ -52,12 +55,20 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
     assert matrix.shape == (m, n)
     assert zero + nonneg == m
 
-    # equilibration
-    d, e, sigma, rho, matrix_transf, b_transf, c_transf = \
-    equilibrate.hsde_ruiz_equilibration(
-            matrix, b, c, dimensions={
-                'zero': zero, 'nonneg': nonneg, 'second_order': ()},
-            max_iters=25)
+    if QR_PRESOLVE:
+        q, r = np.linalg.qr(np.vstack([matrix.todense(), c.reshape((1, n))]))
+        matrix_transf = q[:-1].A
+        c_transf = q[-1].A1
+        sigma = np.linalg.norm(b)
+        b_transf = b/sigma
+
+    else:
+        # equilibration
+        d, e, sigma, rho, matrix_transf, b_transf, c_transf = \
+        equilibrate.hsde_ruiz_equilibration(
+                matrix, b, c, dimensions={
+                    'zero': zero, 'nonneg': nonneg, 'second_order': ()},
+                max_iters=25)
 
     # define (loss, gradient) function; this is for LPs only
 
@@ -198,13 +209,13 @@ def callback(xy):
             # callback=_callback if DEBUG else None,
             memory=lbfgs_memory,
             max_iters=int(1e10),
-            c_1=1e-3,
-            c_2=.1,
+            #c_1=1e-3,
+            #c_2=.1,
             # ls_backtrack=.5,
             # ls_forward=1.1,
             pgtol=0., #PGTOL,
-            hessian_approximator=hessian,
-            hessian_cg_iters=20,
+            # hessian_approximator=hessian,
+            #hessian_cg_iters=20,
             # use_active_set=ACTIVE_SET,
             max_ls=100)
 
@@ -224,10 +235,10 @@ def callback(xy):
     v = Q @ u
 
     # refine
-    for i in range(5):
+    for i in range(refinement_iters):
         u, v = refine(
             z=u-v, matrix=matrix_transf, b=b_transf, c=c_transf, zero=zero,
-            nonneg=nonneg, max_iters=100)
+            nonneg=nonneg, max_iters=refinement_lsqr_iters)
 
     # Transform back into problem format
     u1, u2, u3 = u[:n], u[n:n+m], u[-1]
@@ -241,9 +252,15 @@ def callback(xy):
     y = u2 / u3
     s = v2 / u3
 
-    # invert Ruiz scaling, copied from other repo
-    x_orig =  e * x / sigma
-    y_orig = d * y / rho
-    s_orig = (s/d) / sigma
+    if QR_PRESOLVE:
+        x_orig = np.linalg.solve(r, x) * sigma
+        y_orig = y
+        s_orig = s * sigma
+
+    else:
+        # invert Ruiz scaling, copied from other repo
+        x_orig =  e * x / sigma
+        y_orig = d * y / rho
+        s_orig = (s/d) / sigma
 
     return x_orig, y_orig, s_orig
diff --git a/project_euromir/tests/test_solver.py b/project_euromir/tests/test_solver.py
index 020b408..fca35e6 100644
--- a/project_euromir/tests/test_solver.py
+++ b/project_euromir/tests/test_solver.py
@@ -46,13 +46,13 @@ def test_simple(self):
         for seed in range(10):
             print('\n\nEXPERIMENT', seed+1)
             np.random.seed(seed)
-            m, n = 21, 20
+            m, n = 41, 40
             x = cp.Variable(n)
             A = np.random.randn(m, n)
             b = np.random.randn(m)
             objective = cp.norm1(A @ x - b)
             d = np.random.randn(n, 2)
-            constraints = [cp.abs(x) <= .25, x @ d == 1.,]
+            constraints = [cp.abs(x) <= .15, x @ d == 2.,]
 
             def _get_stats():
                 constr_errs = [