From e5ad081878f87a57ac9e49ec9ef5d1d1d23d7207 Mon Sep 17 00:00:00 2001 From: Enzo Busseti Date: Thu, 27 Jun 2024 11:39:46 +0400 Subject: [PATCH] trying ncg with new formulation --- project_euromir/cvxpy_solver.py | 19 ++- project_euromir/lbfgs.py | 2 +- project_euromir/loss_no_hsde.py | 186 +++++++++++++++++++++++++++ project_euromir/newton_cg.py | 6 +- project_euromir/solver_cg.py | 79 ++++++++---- project_euromir/solver_nohsde.py | 51 +++++--- project_euromir/tests/test_solver.py | 4 +- 7 files changed, 296 insertions(+), 51 deletions(-) create mode 100644 project_euromir/loss_no_hsde.py diff --git a/project_euromir/cvxpy_solver.py b/project_euromir/cvxpy_solver.py index 4110568..e80dfbe 100644 --- a/project_euromir/cvxpy_solver.py +++ b/project_euromir/cvxpy_solver.py @@ -52,13 +52,20 @@ raise ImportError( "Can't use CVXPY interface if CVXPY is not installed!") from exc -# original -from project_euromir import solve -# without hsde -from project_euromir.solver_nohsde import solve +# original, with hsde +# from project_euromir import solve -# newton cg -# from project_euromir.solver_cg import solve +BFGS = False +NEWTON_CG = not BFGS + +if BFGS: + # without hsde + from project_euromir.solver_nohsde import solve + +if NEWTON_CG: + + # newton cg + from project_euromir.solver_cg import solve class Solver(ConicSolver): diff --git a/project_euromir/lbfgs.py b/project_euromir/lbfgs.py index b75df2d..c99c5da 100644 --- a/project_euromir/lbfgs.py +++ b/project_euromir/lbfgs.py @@ -51,7 +51,7 @@ 'g': np.array([-1.]), 'ftol': np.array([1e-3]), 'gtol': np.array([0.9]), - 'xtol': np.array([1e-5]), # TODO: figure out if this depends on scale + 'xtol': np.array([1e-8]), # TODO: figure out if this depends on scale 'stpmin': np.array([0.]), 'stpmax': np.array([1000.]), # in lbfgsb this is set iteratively... 'isave': np.zeros(20, dtype=np.int32), diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py new file mode 100644 index 0000000..d1b30b2 --- /dev/null +++ b/project_euromir/loss_no_hsde.py @@ -0,0 +1,186 @@ +# BSD 3-Clause License + +# Copyright (c) 2024-, Enzo Busseti + +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: + +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. + +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. + +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. + +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +"""Define loss function(s), gradient(s), ....""" + +import numpy as np +import scipy as sp + +HSDE_SCALING_PENALTY = False + +### +# The loss function used in the main loop +### + +def create_workspace(m, n, zero): + workspace = {} + + # preallocate some variables + workspace['y_error'] = np.empty(m-zero, dtype=float) + workspace['s_error'] = np.empty(m, dtype=float) + workspace['dual_residual'] = np.empty(n, dtype=float) + workspace['s'] = np.empty(m, dtype=float) + workspace['gradient'] = np.empty(n+m, dtype=float) + + return workspace + +# variable is xy +def loss_gradient(xy, m, n, zero, matrix, b, c, workspace): + """Function for LBFGS loop, used in line search as well.""" + + x = xy[:n] + y = xy[n:] + + # zero cone dual variable is unconstrained + workspace['y_error'][:] = np.minimum(y[zero:], 0.) + + # this must be all zeros + workspace['dual_residual'][:] = matrix.T @ y + c + + # slacks + workspace['s'][:] = -matrix @ x + b + + # slacks for zero cone must be zero + workspace['s_error'][:zero] = workspace['s'][:zero] + workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.) + + # duality gap + gap = c.T @ x + b.T @ y + + # loss + loss = np.linalg.norm(workspace['y_error'])**2 + loss += np.linalg.norm(workspace['dual_residual'])**2 + loss += np.linalg.norm(workspace['s_error'])**2 + loss += gap**2 + + # dual residual sqnorm + workspace['gradient'][n:] = 2 * (matrix @ workspace['dual_residual']) + + # s_error sqnorm + workspace['gradient'][:n] = -2 * (matrix.T @ workspace['s_error']) + + # y_error sqnorm + workspace['gradient'][n+zero:] += 2 * workspace['y_error'] + + # gap sq + workspace['gradient'][:n] += (2 * gap) * c + workspace['gradient'][n:] += (2 * gap) * b + + return loss, workspace['gradient'] + +def hessian(xy, m, n, zero, matrix, b, c, workspace): + """Hessian to use inside LBFGS loop.""" + + locals().update(workspace) + + x = xy[:n] + y = xy[n:] + + # zero cone dual variable is unconstrained + workspace['y_error'][:] = np.minimum(y[zero:], 0.) + + # this must be all zeros + workspace['dual_residual'][:] = matrix.T @ y + c + + # slacks + workspace['s'][:] = -matrix @ x + b + + # slacks for zero cone must be zero + workspace['s_error'][:zero] = workspace['s'][:zero] + workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.) + + def _matvec(dxdy): + result = np.empty_like(dxdy) + dx = dxdy[:n] + dy = dxdy[n:] + + # dual residual sqnorm + result[n:] = 2 * (matrix @ (matrix.T @ dy)) + + # s_error sqnorm + s_mask = np.ones(m, dtype=float) + s_mask[zero:] = workspace['s_error'][zero:] < 0. + result[:n] = 2 * (matrix.T @ (s_mask * (matrix @ dx))) + + # y_error sqnorm + y_mask = np.ones(m-zero, dtype=float) + y_mask[:] = workspace['y_error'] < 0. + result[n+zero:] += 2 * y_mask * dy[zero:] + + # gap + constants = np.concatenate([c, b]) + result[:] += constants * (2 * (constants @ dxdy)) + + return result + + return sp.sparse.linalg.LinearOperator( + shape=(len(xy), len(xy)), + matvec=_matvec + ) + +def _densify(linear_operator): + assert linear_operator.shape[0] == linear_operator.shape[1] + result = np.empty(linear_operator.shape) + identity = np.eye(result.shape[0]) + for i in range(len(identity)): + result[:, i] = linear_operator.matvec(identity[:, i]) + return result + + +if __name__ == '__main__': + + from scipy.optimize import check_grad + + # create consts + np.random.seed(0) + m = 20 + n = 10 + zero = 5 + nonneg = 15 + matrix = np.random.randn(m, n) + b = np.random.randn(m) + c = np.random.randn(n) + + wks = create_workspace(m, n, zero) + + def my_loss(xy): + return loss_gradient(xy, m, n, zero, matrix, b, c, wks)[0] + + def my_grad(xy): + return np.copy(loss_gradient(xy, m, n, zero, matrix, b, c, wks)[1]) + + def my_hessian(xy): + return _densify(hessian(xy, m, n, zero, matrix, b, c, wks)) + + print('\nCHECKING GRADIENT') + for i in range(10): + print(check_grad(my_loss, my_grad, np.random.randn(n+m))) + + print('\nCHECKING HESSIAN') + for i in range(10): + print(check_grad(my_grad, my_hessian, np.random.randn(n+m))) diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py index fd01f22..1b89458 100644 --- a/project_euromir/newton_cg.py +++ b/project_euromir/newton_cg.py @@ -352,7 +352,7 @@ def terminate(warnflag, msg): for k2 in range(cg_maxiter): if np.add.reduce(np.abs(ri)) <= termcond: - print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}') + # print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}') # breakpoint() break if fhess is None: @@ -369,7 +369,7 @@ def terminate(warnflag, msg): Ap = asarray(Ap).squeeze() # get rid of matrices... curv = np.dot(psupi, Ap) if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps: - print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}') + # print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}') break elif curv < 0: if (i > 0): @@ -414,7 +414,7 @@ def terminate(warnflag, msg): return terminate(5, "") update_l1norm = np.linalg.norm(update, ord=1) - print(f'update_l1norm, {update_l1norm:.2e}') + # print(f'update_l1norm, {update_l1norm:.2e}') else: if np.isnan(old_fval) or np.isnan(update_l1norm): diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py index 55d0103..7f0fcf6 100644 --- a/project_euromir/solver_cg.py +++ b/project_euromir/solver_cg.py @@ -33,11 +33,18 @@ import numpy as np import scipy as sp +NOHSDE = True + from project_euromir import equilibrate from project_euromir.lbfgs import minimize_lbfgs -from project_euromir.loss import (common_computation_main, - create_workspace_main, gradient, hessian, - loss) + +if not NOHSDE: + from project_euromir.loss import (common_computation_main, + create_workspace_main, gradient, hessian, + loss) +else: + from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian) + from project_euromir.newton_cg import _epsilon, _minimize_newtoncg from project_euromir.refinement import refine @@ -69,30 +76,51 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10): [-c_transf.reshape(1, n), -b_transf.reshape(1, m), None], ]).tocsc() - # prepare workspace - workspace = create_workspace_main(Q, n, zero, nonneg) + if not NOHSDE: + + # prepare workspace + workspace = create_workspace_main(Q, n, zero, nonneg) + + # these functions should be unpacked inside newton-cg + def separated_loss(u): + common_computation_main(u, Q, n, zero, nonneg, workspace) + return loss(u, Q, n, zero, nonneg, workspace) + + def separated_grad(u): + common_computation_main(u, Q, n, zero, nonneg, workspace) + return np.copy(gradient(u, Q, n, zero, nonneg, workspace)) - # these functions should be unpacked inside newton-cg - def separated_loss(u): - common_computation_main(u, Q, n, zero, nonneg, workspace) - return loss(u, Q, n, zero, nonneg, workspace) + def separated_hessian(u): + common_computation_main(u, Q, n, zero, nonneg, workspace) + return hessian(u, Q, n, zero, nonneg, workspace) - def separated_grad(u): - common_computation_main(u, Q, n, zero, nonneg, workspace) - return np.copy(gradient(u, Q, n, zero, nonneg, workspace)) + x_0 = np.zeros(n+m+1) + x_0[-1] = 1. - def separated_hessian(u): - common_computation_main(u, Q, n, zero, nonneg, workspace) - return hessian(u, Q, n, zero, nonneg, workspace) + else: - x_0 = np.zeros(n+m+1) - x_0[-1] = 1. + workspace = create_workspace(m, n, zero) + + # these functions should be unpacked inside newton-cg + def separated_loss(xy): + return loss_gradient( + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[0] + + def separated_grad(xy): + return np.copy(loss_gradient( + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[1]) + + def separated_hessian(xy): + return hessian( + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace) + + x_0 = np.zeros(n+m) old_x = np.empty_like(x_0) def callback(current): print('current loss', current.fun) - print('current kappa', current.x[-1]) + # print('current kappa', current.x[-1]) # if current.fun < np.finfo(float).eps**2: # raise StopIteration # if current.x[-1] < 1e-2: @@ -110,7 +138,7 @@ def callback(current): jac=separated_grad, hess=separated_hessian, hessp=None, - callback=callback, + # callback=callback, xtol=0., #1e-5, eps=_epsilon, # unused maxiter=10000, @@ -118,12 +146,19 @@ def callback(current): disp=1, return_all=False, c1=1e-4, c2=0.9) - breakpoint() + # breakpoint() print(f'NEWTON-CG took {time.time() - start:.3f} seconds') + if not NOHSDE: # extract result - u = result['x'] - assert u[-1] > 0, 'Certificates not yet implemented' + u = result['x'] + assert u[-1] > 0, 'Certificates not yet implemented' + else: + xy = result['x'] + u = np.empty(n+m+1, dtype=float) + u[:-1] = xy + u[-1] = 1. + # u /= u[-1] v = Q @ u diff --git a/project_euromir/solver_nohsde.py b/project_euromir/solver_nohsde.py index 2901420..e86a42a 100644 --- a/project_euromir/solver_nohsde.py +++ b/project_euromir/solver_nohsde.py @@ -42,8 +42,11 @@ if DEBUG: import matplotlib.pyplot as plt +QR_PRESOLVE = False -def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10): +def solve( + matrix, b, c, zero, nonneg, lbfgs_memory=10, refinement_iters=50, + refinement_lsqr_iters=100): "Main function." # some shape checking @@ -52,12 +55,20 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10): assert matrix.shape == (m, n) assert zero + nonneg == m - # equilibration - d, e, sigma, rho, matrix_transf, b_transf, c_transf = \ - equilibrate.hsde_ruiz_equilibration( - matrix, b, c, dimensions={ - 'zero': zero, 'nonneg': nonneg, 'second_order': ()}, - max_iters=25) + if QR_PRESOLVE: + q, r = np.linalg.qr(np.vstack([matrix.todense(), c.reshape((1, n))])) + matrix_transf = q[:-1].A + c_transf = q[-1].A1 + sigma = np.linalg.norm(b) + b_transf = b/sigma + + else: + # equilibration + d, e, sigma, rho, matrix_transf, b_transf, c_transf = \ + equilibrate.hsde_ruiz_equilibration( + matrix, b, c, dimensions={ + 'zero': zero, 'nonneg': nonneg, 'second_order': ()}, + max_iters=25) # define (loss, gradient) function; this is for LPs only @@ -198,13 +209,13 @@ def callback(xy): # callback=_callback if DEBUG else None, memory=lbfgs_memory, max_iters=int(1e10), - c_1=1e-3, - c_2=.1, + #c_1=1e-3, + #c_2=.1, # ls_backtrack=.5, # ls_forward=1.1, pgtol=0., #PGTOL, - hessian_approximator=hessian, - hessian_cg_iters=20, + # hessian_approximator=hessian, + #hessian_cg_iters=20, # use_active_set=ACTIVE_SET, max_ls=100) @@ -224,10 +235,10 @@ def callback(xy): v = Q @ u # refine - for i in range(5): + for i in range(refinement_iters): u, v = refine( z=u-v, matrix=matrix_transf, b=b_transf, c=c_transf, zero=zero, - nonneg=nonneg, max_iters=100) + nonneg=nonneg, max_iters=refinement_lsqr_iters) # Transform back into problem format u1, u2, u3 = u[:n], u[n:n+m], u[-1] @@ -241,9 +252,15 @@ def callback(xy): y = u2 / u3 s = v2 / u3 - # invert Ruiz scaling, copied from other repo - x_orig = e * x / sigma - y_orig = d * y / rho - s_orig = (s/d) / sigma + if QR_PRESOLVE: + x_orig = np.linalg.solve(r, x) * sigma + y_orig = y + s_orig = s * sigma + + else: + # invert Ruiz scaling, copied from other repo + x_orig = e * x / sigma + y_orig = d * y / rho + s_orig = (s/d) / sigma return x_orig, y_orig, s_orig diff --git a/project_euromir/tests/test_solver.py b/project_euromir/tests/test_solver.py index 020b408..fca35e6 100644 --- a/project_euromir/tests/test_solver.py +++ b/project_euromir/tests/test_solver.py @@ -46,13 +46,13 @@ def test_simple(self): for seed in range(10): print('\n\nEXPERIMENT', seed+1) np.random.seed(seed) - m, n = 21, 20 + m, n = 41, 40 x = cp.Variable(n) A = np.random.randn(m, n) b = np.random.randn(m) objective = cp.norm1(A @ x - b) d = np.random.randn(n, 2) - constraints = [cp.abs(x) <= .25, x @ d == 1.,] + constraints = [cp.abs(x) <= .15, x @ d == 2.,] def _get_stats(): constr_errs = [