experiments, refactoring refinement

enzbus · Sep 12, 2024 · cdd8e84 · cdd8e84
1 parent 6b5784a
commit cdd8e84
Show file tree

Hide file tree

Showing 3 changed files with 242 additions and 17 deletions.
diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py
@@ -37,13 +37,22 @@ def create_workspace(m, n, zero, soc=()):
 
     return workspace
 
-def project(vec, result, zero, nonneg, soc):
+def project(vec, result, nonneg, soc):
     """Project. We skip zero cone.
 
     This is -Pi_K(-vec), not sure if minus outside is needed.
     """
     result[:nonneg] = np.minimum(vec[:nonneg], 0.)
 
+    # SOC
+    soc_cursor = 0
+    for soc_cone in soc:
+        second_order_project(
+            z=-vec[nonneg+soc_cursor:nonneg+soc_cursor+soc_cone],
+            result=result[nonneg+soc_cursor:nonneg+soc_cursor+soc_cone])
+        soc_cursor += soc_cone
+    result[nonneg:nonneg+soc_cursor] *= -1.
+
 def dual_project(vec, result, zero, nonneg, soc):
     """Project on dual cone.
 
@@ -52,7 +61,16 @@ def dual_project(vec, result, zero, nonneg, soc):
     result[:zero] = vec[:zero]
     result[zero:zero+nonneg] = np.minimum(vec[zero:zero+nonneg], 0.)
 
-def make_derivative_project(vec, result, zero, nonneg, soc):
+    # SOC
+    soc_cursor = 0
+    for soc_cone in soc:
+        second_order_project(
+            z=-vec[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone],
+            result=result[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone])
+        soc_cursor += soc_cone
+    result[zero+nonneg:zero+nonneg+soc_cursor] *= -1.
+
+def make_derivative_project(vec, result, nonneg, soc):
     """Make DPi_K. We skip zero cone.
     """
     y_mask = np.ones(nonneg, dtype=float)
@@ -84,19 +102,7 @@ def loss_gradient(xy, m, n, zero, nonneg, matrix, b, c, workspace, soc=()):
 
     # zero cone dual variable is unconstrained
     # workspace['y_error'][:nonneg] = np.minimum(y[zero:zero+nonneg], 0.)
-    project(vec=y[zero:], result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc)
-    # dual_cone_project(
-    #     z_cone=-y[zero:],
-    #     result=workspace['y_error'],
-    #     dimensions=(None, 0, nonneg, soc))
-    # workspace['y_error'] = -workspace['y_error']
-    # soc_cursor = 0
-    # for soc_cone in soc:
-    #     second_order_project(
-    #         z=-y[zero+nonneg+soc_cursor:zero+nonneg+soc_cursor+soc_cone],
-    #         result=workspace['y_error'][nonneg+soc_cursor:nonneg+soc_cursor+soc_cone])
-    #     workspace['y_error'][nonneg+soc_cursor:nonneg+soc_cursor+soc_cone] *= -1.
-    #     soc_cursor += soc_cone
+    project(vec=y[zero:], result=workspace['y_error'], nonneg=nonneg, soc=soc)
 
     # this must be all zeros
     workspace['dual_residual'][:] = matrix.T @ y + c
@@ -154,7 +160,7 @@ def hessian(
 
     # zero cone dual variable is unconstrained
     # workspace['y_error'][:nonneg] = np.minimum(y[zero:zero+nonneg], 0.)
-    project(vec=y[zero:], result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc)
+    project(vec=y[zero:], result=workspace['y_error'], nonneg=nonneg, soc=soc)
 
     # this must be all zeros
     workspace['dual_residual'][:] = matrix.T @ y + c
@@ -181,7 +187,7 @@ def hessian(
         nonneg=nonneg, soc=soc)
 
     dpi_y = make_derivative_project(
-        vec=y, result=workspace['y_error'], zero=zero, nonneg=nonneg, soc=soc)
+        vec=y, result=workspace['y_error'], nonneg=nonneg, soc=soc)
 
     def _matvec(dxdy):
         result = np.empty_like(dxdy)

diff --git a/project_euromir/refinement_no_hsde.py b/project_euromir/refinement_no_hsde.py
@@ -0,0 +1,182 @@
+# Copyright 2024 Enzo Busseti
+#
+# This file is part of Project Euromir.
+#
+# Project Euromir is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation, either version 3 of the License, or (at your option) any
+# later version.
+#
+# Project Euromir is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
+# details.
+#
+# You should have received a copy of the GNU General Public License along with
+# Project Euromir. If not, see <https://www.gnu.org/licenses/>.
+"""Define residual and Dresidual for use by refinement loop."""
+
+import numpy as np
+import scipy as sp
+
+from .loss_no_hsde import _densify_also_nonsquare
+
+
+def residual(xz, m, n, zero, nonneg, matrix, b, c, soc=()):
+    """Residual function for refinement."""
+
+    x = xz[:n]
+    z = xz[n:]
+
+    # projection
+    y = np.empty_like(z)
+    y[:zero] = z[:zero]
+    y[zero:zero+nonneg] = np.maximum(z[zero:zero+nonneg], 0.)
+    s = y - z
+
+    # print(y)
+    # print(s)
+
+    # primal residual
+    primal_residual = matrix @ x - b + s
+
+    # dual residual
+    dual_residual = c + matrix.T @ y
+
+    # duality gap
+    gap = c.T @ x + b.T @ y
+
+    # build the full residual by concatenating residuals
+    res = np.zeros(n + m + 1, dtype=float)
+    res[:m] = primal_residual
+    res[m:m+n] = dual_residual
+    res[-1] = gap
+
+    return res
+
+def Dresidual_densefull(xz, m, n, zero, nonneg, matrix, b, c, soc=()):
+    """Dense Jacobian for testing."""
+
+    jacobian = np.zeros((n+m+1, n+m), dtype=float)
+
+    assert len(soc) == 0
+
+    # Pi derivatives
+    y_mask = np.ones(m, dtype=float)
+    y_mask[zero:zero+nonneg] = xz[n+zero:n+zero+nonneg] >= 0
+    s_mask = y_mask - 1.
+    # print(y_mask)
+    # print(s_mask)
+
+    # pri res
+    jacobian[:m, :n] = matrix
+    jacobian[:m, n:] = np.diag(s_mask)
+
+    # dua res
+    jacobian[m:m+n, n:] = matrix.T @ np.diag(y_mask)
+
+    # gap
+    jacobian[-1, :n] = c
+    jacobian[-1, n:] = y_mask * b
+
+    return jacobian
+
+
+def Dresidual(xy, m, n, zero, nonneg, matrix, b, c, soc=()):
+    """Linear operator to matrix multiply the refinement residual derivative."""
+
+    x = xy[:n]
+    y = xy[n:]
+
+    # zero cone dual variable is unconstrained
+    y_mask = (y[zero:] <= 0.) * 1.
+
+    # slacks
+    s = -matrix @ x + b
+
+    # slacks for zero cone must be zero
+    s_mask = np.ones_like(s)
+    s_mask[zero:] = s[zero:] <= 0.
+
+    # concatenation of primal and dual costs
+    pridua = np.concatenate([c, b])
+
+    def _matvec(dxy):
+
+        # decompose direction
+        dx = dxy[:n]
+        dy = dxy[n:]
+
+        # compose result
+        dr = np.empty(n + 2 * m - zero + 1, dtype=float)
+        dr[:m-zero] = y_mask * dy[zero:]
+        dr[m-zero:m+n-zero] = matrix.T @ dy
+        dr[-1-m:-1] = s_mask * (-(matrix @ dx))
+        dr[-1] = pridua @ dxy
+
+        return dr
+
+    def _rmatvec(dr):
+
+        # decompose direction
+        dy_err = dr[:m-zero]
+        ddua_res = dr[m-zero:m+n-zero]
+        ds_err = dr[-1-m:-1]
+        dgap = dr[-1]
+
+        # compose result
+        dxy = np.zeros(n + m, dtype=float)
+        dxy[-(m-zero):] += y_mask * dy_err
+        dxy[-m:] += matrix @ ddua_res
+        dxy[:n] -= matrix.T @ (s_mask * ds_err)
+        dxy += dgap * pridua
+
+        return dxy
+
+    return sp.sparse.linalg.LinearOperator(
+        shape=(n + 2 * m - zero + 1, n+m),
+        matvec = _matvec,
+        rmatvec = _rmatvec)
+
+if __name__ == '__main__': # pragma: no cover
+
+    from scipy.optimize import check_grad
+
+    # create consts
+    np.random.seed(0)
+    m = 20
+    n = 10
+    zero = 5
+    nonneg = m-zero
+    matrix = np.random.randn(m, n)
+    b = np.random.randn(m)
+    c = np.random.randn(n)
+
+    def my_residual(xz):
+        return residual(xz, m, n, zero, nonneg, matrix, b, c)
+
+    def my_Dresidual(xz):
+        return Dresidual(xz, m, n, zero, nonneg, matrix, b, c)
+
+    def my_Dresidual_densefull(xz):
+        return Dresidual_densefull(xz, m, n, zero, nonneg, matrix, b, c)
+
+    def my_Dresidual_dense(xz):
+        return _densify_also_nonsquare(my_Dresidual(xz))
+
+    print('\nCHECKING D_RESIDUAL DENSE')
+    for i in range(10):
+        print(check_grad(
+            my_residual, my_Dresidual_densefull, np.random.randn(n+m)))
+
+    # print('\nCHECKING D_RESIDUAL')
+    # for i in range(10):
+    #     print(check_grad(my_residual, my_Dresidual_dense, np.random.randn(n+m)))
+
+    # print('\nCHECKING DR and DR^T CONSISTENT')
+    # for i in range(10):
+    #     xy = np.random.randn(n+m)
+    #     DR = _densify_also_nonsquare(my_Dresidual(xy))
+    #     DRT = _densify_also_nonsquare(my_Dresidual(xy).T)
+    #     assert np.allclose(DR.T, DRT)
+    # print('\tOK!')
diff --git a/project_euromir/solver_new.py b/project_euromir/solver_new.py
@@ -198,6 +198,7 @@ def _local_derivative_residual(xy):
 
     _start = time.time()
     # extra_iters=5
+    # all_losses = []
 
     for newton_iterations in range(1000):
 
@@ -228,10 +229,46 @@ def _local_derivative_residual(xy):
             current_loss=loss_xy,
             current_gradient=grad_xy, direction=direction)
 
+        # all_losses.append(loss_xy)
+
+        # import matplotlib.pyplot as plt
+        # iter_x = xy[:n]
+        # iter_y = xy[n:]
+        # iter_s = -matrix_transf @ iter_x + b_transf
+        # positive_y = iter_y[zero:zero+nonneg] > 0
+        # positive_s = iter_s[zero:zero+nonneg] > 0
+        # both_positive = positive_y & positive_s
+        # myy = np.copy(iter_y[zero:zero+nonneg])
+        # plt.plot(myy, alpha=.3, color='b', marker='*', linestyle=' ')
+        # myy[~both_positive] = np.nan
+
+        # mys = np.copy(iter_s[zero:zero+nonneg])
+        # plt.plot(mys, alpha=.3, color='g', marker='*', linestyle=' ')
+        # mys[~both_positive] = np.nan
+        # plt.plot(myy, color='b', marker='*', linestyle=' ')
+
+        # plt.plot(mys, color='g', marker='*', linestyle=' ')
+        # avgerr = np.sqrt(loss_xy/(m+n))
+        # plt.hlines(avgerr, 0, nonneg, color='r', alpha=.5)
+        # plt.hlines(0, 0, nonneg, color='y', alpha=.5)
+        # to_block_y = (myy < mys) # & (myy < avgerr)
+        # to_block_s = (mys < myy) # & (mys < avgerr)
+        # plt.ylim([0, avgerr])
+        # plt.show()
+
+        # plt.plot(iter_y[zero:zero+nonneg])
+        # plt.plot(iter_s[zero:zero+nonneg])
+        # plt.show()
+        # breakpoint()
+
     else:
         raise FloatingPointError(
             f'Solver did not converge in {newton_iterations} iterations.')
 
+    # import matplotlib.pyplot as plt
+    # plt.semilogy(all_losses)
+    # plt.show()
+
     if loss_xy > np.finfo(float).eps:
         raise NotImplementedError(
             'Loss at convergence is not small enough. '