diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py index cac072c..3fc44d9 100644 --- a/project_euromir/loss_no_hsde.py +++ b/project_euromir/loss_no_hsde.py @@ -37,13 +37,22 @@ def create_workspace(m, n, zero, soc=()): return workspace -def project(vec, result, zero, nonneg, soc): +def project(vec, result, nonneg, soc): """Project. We skip zero cone. This is -Pi_K(-vec), not sure if minus outside is needed. """ result[:nonneg] = np.minimum(vec[:nonneg], 0.) + # SOC + soc_cursor = 0 + for soc_cone in soc: + second_order_project( + z=-vec[nonneg+soc_cursor:nonneg+soc_cursor+soc_cone], + result=result[nonneg+soc_cursor:nonneg+soc_cursor+soc_cone]) + soc_cursor += soc_cone + result[nonneg:nonneg+soc_cursor] *= -1. + def dual_project(vec, result, zero, nonneg, soc): """Project on dual cone. @@ -52,7 +61,16 @@ def dual_project(vec, result, zero, nonneg, soc): result[:zero] = vec[:zero] result[zero:zero+nonneg] = np.minimum(vec[zero:zero+nonneg], 0.) -def make_derivative_project(vec, result, zero, nonneg, soc): + # SOC + soc_cursor = 0 + for soc_cone in soc: + second_order_project( + z=-vec[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone], + result=result[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone]) + soc_cursor += soc_cone + result[zero+nonneg:zero+nonneg+soc_cursor] *= -1. + +def make_derivative_project(vec, result, nonneg, soc): """Make DPi_K. We skip zero cone. """ y_mask = np.ones(nonneg, dtype=float) @@ -84,19 +102,7 @@ def loss_gradient(xy, m, n, zero, nonneg, matrix, b, c, workspace, soc=()): # zero cone dual variable is unconstrained # workspace['y_error'][:nonneg] = np.minimum(y[zero:zero+nonneg], 0.) - project(vec=y[zero:], result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc) - # dual_cone_project( - # z_cone=-y[zero:], - # result=workspace['y_error'], - # dimensions=(None, 0, nonneg, soc)) - # workspace['y_error'] = -workspace['y_error'] - # soc_cursor = 0 - # for soc_cone in soc: - # second_order_project( - # z=-y[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone], - # result=workspace['y_error'][nonneg+soc_cursor:nonneg+soc_cursor+soc_cone]) - # workspace['y_error'][nonneg+soc_cursor:nonneg+soc_cursor+soc_cone] *= -1. - # soc_cursor += soc_cone + project(vec=y[zero:], result=workspace['y_error'], nonneg=nonneg, soc=soc) # this must be all zeros workspace['dual_residual'][:] = matrix.T @ y + c @@ -154,7 +160,7 @@ def hessian( # zero cone dual variable is unconstrained # workspace['y_error'][:nonneg] = np.minimum(y[zero:zero+nonneg], 0.) - project(vec=y[zero:], result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc) + project(vec=y[zero:], result=workspace['y_error'], nonneg=nonneg, soc=soc) # this must be all zeros workspace['dual_residual'][:] = matrix.T @ y + c @@ -181,7 +187,7 @@ def hessian( nonneg=nonneg, soc=soc) dpi_y = make_derivative_project( - vec=y, result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc) + vec=y, result=workspace['y_error'], nonneg=nonneg, soc=soc) def _matvec(dxdy): result = np.empty_like(dxdy) diff --git a/project_euromir/refinement_no_hsde.py b/project_euromir/refinement_no_hsde.py new file mode 100644 index 0000000..e6cc7ca --- /dev/null +++ b/project_euromir/refinement_no_hsde.py @@ -0,0 +1,182 @@ +# Copyright 2024 Enzo Busseti +# +# This file is part of Project Euromir. +# +# Project Euromir is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation, either version 3 of the License, or (at your option) any +# later version. +# +# Project Euromir is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# Project Euromir. If not, see . +"""Define residual and Dresidual for use by refinement loop.""" + +import numpy as np +import scipy as sp + +from .loss_no_hsde import _densify_also_nonsquare + + +def residual(xz, m, n, zero, nonneg, matrix, b, c, soc=()): + """Residual function for refinement.""" + + x = xz[:n] + z = xz[n:] + + # projection + y = np.empty_like(z) + y[:zero] = z[:zero] + y[zero:zero+nonneg] = np.maximum(z[zero:zero+nonneg], 0.) + s = y - z + + # print(y) + # print(s) + + # primal residual + primal_residual = matrix @ x - b + s + + # dual residual + dual_residual = c + matrix.T @ y + + # duality gap + gap = c.T @ x + b.T @ y + + # build the full residual by concatenating residuals + res = np.zeros(n + m + 1, dtype=float) + res[:m] = primal_residual + res[m:m+n] = dual_residual + res[-1] = gap + + return res + +def Dresidual_densefull(xz, m, n, zero, nonneg, matrix, b, c, soc=()): + """Dense Jacobian for testing.""" + + jacobian = np.zeros((n+m+1, n+m), dtype=float) + + assert len(soc) == 0 + + # Pi derivatives + y_mask = np.ones(m, dtype=float) + y_mask[zero:zero+nonneg] = xz[n+zero:n+zero+nonneg] >= 0 + s_mask = y_mask - 1. + # print(y_mask) + # print(s_mask) + + # pri res + jacobian[:m, :n] = matrix + jacobian[:m, n:] = np.diag(s_mask) + + # dua res + jacobian[m:m+n, n:] = matrix.T @ np.diag(y_mask) + + # gap + jacobian[-1, :n] = c + jacobian[-1, n:] = y_mask * b + + return jacobian + + +def Dresidual(xy, m, n, zero, nonneg, matrix, b, c, soc=()): + """Linear operator to matrix multiply the refinement residual derivative.""" + + x = xy[:n] + y = xy[n:] + + # zero cone dual variable is unconstrained + y_mask = (y[zero:] <= 0.) * 1. + + # slacks + s = -matrix @ x + b + + # slacks for zero cone must be zero + s_mask = np.ones_like(s) + s_mask[zero:] = s[zero:] <= 0. + + # concatenation of primal and dual costs + pridua = np.concatenate([c, b]) + + def _matvec(dxy): + + # decompose direction + dx = dxy[:n] + dy = dxy[n:] + + # compose result + dr = np.empty(n + 2 * m - zero + 1, dtype=float) + dr[:m-zero] = y_mask * dy[zero:] + dr[m-zero:m+n-zero] = matrix.T @ dy + dr[-1-m:-1] = s_mask * (-(matrix @ dx)) + dr[-1] = pridua @ dxy + + return dr + + def _rmatvec(dr): + + # decompose direction + dy_err = dr[:m-zero] + ddua_res = dr[m-zero:m+n-zero] + ds_err = dr[-1-m:-1] + dgap = dr[-1] + + # compose result + dxy = np.zeros(n + m, dtype=float) + dxy[-(m-zero):] += y_mask * dy_err + dxy[-m:] += matrix @ ddua_res + dxy[:n] -= matrix.T @ (s_mask * ds_err) + dxy += dgap * pridua + + return dxy + + return sp.sparse.linalg.LinearOperator( + shape=(n + 2 * m - zero + 1, n+m), + matvec = _matvec, + rmatvec = _rmatvec) + +if __name__ == '__main__': # pragma: no cover + + from scipy.optimize import check_grad + + # create consts + np.random.seed(0) + m = 20 + n = 10 + zero = 5 + nonneg = m-zero + matrix = np.random.randn(m, n) + b = np.random.randn(m) + c = np.random.randn(n) + + def my_residual(xz): + return residual(xz, m, n, zero, nonneg, matrix, b, c) + + def my_Dresidual(xz): + return Dresidual(xz, m, n, zero, nonneg, matrix, b, c) + + def my_Dresidual_densefull(xz): + return Dresidual_densefull(xz, m, n, zero, nonneg, matrix, b, c) + + def my_Dresidual_dense(xz): + return _densify_also_nonsquare(my_Dresidual(xz)) + + print('\nCHECKING D_RESIDUAL DENSE') + for i in range(10): + print(check_grad( + my_residual, my_Dresidual_densefull, np.random.randn(n+m))) + + # print('\nCHECKING D_RESIDUAL') + # for i in range(10): + # print(check_grad(my_residual, my_Dresidual_dense, np.random.randn(n+m))) + + # print('\nCHECKING DR and DR^T CONSISTENT') + # for i in range(10): + # xy = np.random.randn(n+m) + # DR = _densify_also_nonsquare(my_Dresidual(xy)) + # DRT = _densify_also_nonsquare(my_Dresidual(xy).T) + # assert np.allclose(DR.T, DRT) + # print('\tOK!') diff --git a/project_euromir/solver_new.py b/project_euromir/solver_new.py index 76e77db..a87e49c 100644 --- a/project_euromir/solver_new.py +++ b/project_euromir/solver_new.py @@ -198,6 +198,7 @@ def _local_derivative_residual(xy): _start = time.time() # extra_iters=5 + # all_losses = [] for newton_iterations in range(1000): @@ -228,10 +229,46 @@ def _local_derivative_residual(xy): current_loss=loss_xy, current_gradient=grad_xy, direction=direction) + # all_losses.append(loss_xy) + + # import matplotlib.pyplot as plt + # iter_x = xy[:n] + # iter_y = xy[n:] + # iter_s = -matrix_transf @ iter_x + b_transf + # positive_y = iter_y[zero:zero+nonneg] > 0 + # positive_s = iter_s[zero:zero+nonneg] > 0 + # both_positive = positive_y & positive_s + # myy = np.copy(iter_y[zero:zero+nonneg]) + # plt.plot(myy, alpha=.3, color='b', marker='*', linestyle=' ') + # myy[~both_positive] = np.nan + + # mys = np.copy(iter_s[zero:zero+nonneg]) + # plt.plot(mys, alpha=.3, color='g', marker='*', linestyle=' ') + # mys[~both_positive] = np.nan + # plt.plot(myy, color='b', marker='*', linestyle=' ') + + # plt.plot(mys, color='g', marker='*', linestyle=' ') + # avgerr = np.sqrt(loss_xy/(m+n)) + # plt.hlines(avgerr, 0, nonneg, color='r', alpha=.5) + # plt.hlines(0, 0, nonneg, color='y', alpha=.5) + # to_block_y = (myy < mys) # & (myy < avgerr) + # to_block_s = (mys < myy) # & (mys < avgerr) + # plt.ylim([0, avgerr]) + # plt.show() + + # plt.plot(iter_y[zero:zero+nonneg]) + # plt.plot(iter_s[zero:zero+nonneg]) + # plt.show() + # breakpoint() + else: raise FloatingPointError( f'Solver did not converge in {newton_iterations} iterations.') + # import matplotlib.pyplot as plt + # plt.semilogy(all_losses) + # plt.show() + if loss_xy > np.finfo(float).eps: raise NotImplementedError( 'Loss at convergence is not small enough. '