Skip to content

Commit

Permalink
trying ncg with new formulation
Browse files Browse the repository at this point in the history
  • Loading branch information
enzbus committed Jun 27, 2024
1 parent 4e49d60 commit e5ad081
Show file tree
Hide file tree
Showing 7 changed files with 296 additions and 51 deletions.
19 changes: 13 additions & 6 deletions project_euromir/cvxpy_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,20 @@
raise ImportError(
"Can't use CVXPY interface if CVXPY is not installed!") from exc

# original
from project_euromir import solve
# without hsde
from project_euromir.solver_nohsde import solve
# original, with hsde
# from project_euromir import solve

# newton cg
# from project_euromir.solver_cg import solve
BFGS = False
NEWTON_CG = not BFGS

if BFGS:
# without hsde
from project_euromir.solver_nohsde import solve

if NEWTON_CG:

# newton cg
from project_euromir.solver_cg import solve


class Solver(ConicSolver):
Expand Down
2 changes: 1 addition & 1 deletion project_euromir/lbfgs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
'g': np.array([-1.]),
'ftol': np.array([1e-3]),
'gtol': np.array([0.9]),
'xtol': np.array([1e-5]), # TODO: figure out if this depends on scale
'xtol': np.array([1e-8]), # TODO: figure out if this depends on scale
'stpmin': np.array([0.]),
'stpmax': np.array([1000.]), # in lbfgsb this is set iteratively...
'isave': np.zeros(20, dtype=np.int32),
Expand Down
186 changes: 186 additions & 0 deletions project_euromir/loss_no_hsde.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# BSD 3-Clause License

# Copyright (c) 2024-, Enzo Busseti

# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:

# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.

# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.

# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.

# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Define loss function(s), gradient(s), ...."""

import numpy as np
import scipy as sp

HSDE_SCALING_PENALTY = False

###
# The loss function used in the main loop
###

def create_workspace(m, n, zero):
workspace = {}

# preallocate some variables
workspace['y_error'] = np.empty(m-zero, dtype=float)
workspace['s_error'] = np.empty(m, dtype=float)
workspace['dual_residual'] = np.empty(n, dtype=float)
workspace['s'] = np.empty(m, dtype=float)
workspace['gradient'] = np.empty(n+m, dtype=float)

return workspace

# variable is xy
def loss_gradient(xy, m, n, zero, matrix, b, c, workspace):
"""Function for LBFGS loop, used in line search as well."""

x = xy[:n]
y = xy[n:]

# zero cone dual variable is unconstrained
workspace['y_error'][:] = np.minimum(y[zero:], 0.)

# this must be all zeros
workspace['dual_residual'][:] = matrix.T @ y + c

# slacks
workspace['s'][:] = -matrix @ x + b

# slacks for zero cone must be zero
workspace['s_error'][:zero] = workspace['s'][:zero]
workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)

# duality gap
gap = c.T @ x + b.T @ y

# loss
loss = np.linalg.norm(workspace['y_error'])**2
loss += np.linalg.norm(workspace['dual_residual'])**2
loss += np.linalg.norm(workspace['s_error'])**2
loss += gap**2

# dual residual sqnorm
workspace['gradient'][n:] = 2 * (matrix @ workspace['dual_residual'])

# s_error sqnorm
workspace['gradient'][:n] = -2 * (matrix.T @ workspace['s_error'])

# y_error sqnorm
workspace['gradient'][n+zero:] += 2 * workspace['y_error']

# gap sq
workspace['gradient'][:n] += (2 * gap) * c
workspace['gradient'][n:] += (2 * gap) * b

return loss, workspace['gradient']

def hessian(xy, m, n, zero, matrix, b, c, workspace):
"""Hessian to use inside LBFGS loop."""

locals().update(workspace)

x = xy[:n]
y = xy[n:]

# zero cone dual variable is unconstrained
workspace['y_error'][:] = np.minimum(y[zero:], 0.)

# this must be all zeros
workspace['dual_residual'][:] = matrix.T @ y + c

# slacks
workspace['s'][:] = -matrix @ x + b

# slacks for zero cone must be zero
workspace['s_error'][:zero] = workspace['s'][:zero]
workspace['s_error'][zero:] = np.minimum(workspace['s'][zero:], 0.)

def _matvec(dxdy):
result = np.empty_like(dxdy)
dx = dxdy[:n]
dy = dxdy[n:]

# dual residual sqnorm
result[n:] = 2 * (matrix @ (matrix.T @ dy))

# s_error sqnorm
s_mask = np.ones(m, dtype=float)
s_mask[zero:] = workspace['s_error'][zero:] < 0.
result[:n] = 2 * (matrix.T @ (s_mask * (matrix @ dx)))

# y_error sqnorm
y_mask = np.ones(m-zero, dtype=float)
y_mask[:] = workspace['y_error'] < 0.
result[n+zero:] += 2 * y_mask * dy[zero:]

# gap
constants = np.concatenate([c, b])
result[:] += constants * (2 * (constants @ dxdy))

return result

return sp.sparse.linalg.LinearOperator(
shape=(len(xy), len(xy)),
matvec=_matvec
)

def _densify(linear_operator):
assert linear_operator.shape[0] == linear_operator.shape[1]
result = np.empty(linear_operator.shape)
identity = np.eye(result.shape[0])
for i in range(len(identity)):
result[:, i] = linear_operator.matvec(identity[:, i])
return result


if __name__ == '__main__':

from scipy.optimize import check_grad

# create consts
np.random.seed(0)
m = 20
n = 10
zero = 5
nonneg = 15
matrix = np.random.randn(m, n)
b = np.random.randn(m)
c = np.random.randn(n)

wks = create_workspace(m, n, zero)

def my_loss(xy):
return loss_gradient(xy, m, n, zero, matrix, b, c, wks)[0]

def my_grad(xy):
return np.copy(loss_gradient(xy, m, n, zero, matrix, b, c, wks)[1])

def my_hessian(xy):
return _densify(hessian(xy, m, n, zero, matrix, b, c, wks))

print('\nCHECKING GRADIENT')
for i in range(10):
print(check_grad(my_loss, my_grad, np.random.randn(n+m)))

print('\nCHECKING HESSIAN')
for i in range(10):
print(check_grad(my_grad, my_hessian, np.random.randn(n+m)))
6 changes: 3 additions & 3 deletions project_euromir/newton_cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def terminate(warnflag, msg):

for k2 in range(cg_maxiter):
if np.add.reduce(np.abs(ri)) <= termcond:
print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
# print(f'iter {k}, breaking CG loop at cgiter {k2} with termcond {termcond:.2e}')
# breakpoint()
break
if fhess is None:
Expand All @@ -369,7 +369,7 @@ def terminate(warnflag, msg):
Ap = asarray(Ap).squeeze() # get rid of matrices...
curv = np.dot(psupi, Ap)
if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps:
print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
# print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
break
elif curv < 0:
if (i > 0):
Expand Down Expand Up @@ -414,7 +414,7 @@ def terminate(warnflag, msg):
return terminate(5, "")
update_l1norm = np.linalg.norm(update, ord=1)

print(f'update_l1norm, {update_l1norm:.2e}')
# print(f'update_l1norm, {update_l1norm:.2e}')

else:
if np.isnan(old_fval) or np.isnan(update_l1norm):
Expand Down
79 changes: 57 additions & 22 deletions project_euromir/solver_cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,18 @@
import numpy as np
import scipy as sp

NOHSDE = True

from project_euromir import equilibrate
from project_euromir.lbfgs import minimize_lbfgs
from project_euromir.loss import (common_computation_main,
create_workspace_main, gradient, hessian,
loss)

if not NOHSDE:
from project_euromir.loss import (common_computation_main,
create_workspace_main, gradient, hessian,
loss)
else:
from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian)

from project_euromir.newton_cg import _epsilon, _minimize_newtoncg
from project_euromir.refinement import refine

Expand Down Expand Up @@ -69,30 +76,51 @@ def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
[-c_transf.reshape(1, n), -b_transf.reshape(1, m), None],
]).tocsc()

# prepare workspace
workspace = create_workspace_main(Q, n, zero, nonneg)
if not NOHSDE:

# prepare workspace
workspace = create_workspace_main(Q, n, zero, nonneg)

# these functions should be unpacked inside newton-cg
def separated_loss(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return loss(u, Q, n, zero, nonneg, workspace)

def separated_grad(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return np.copy(gradient(u, Q, n, zero, nonneg, workspace))

# these functions should be unpacked inside newton-cg
def separated_loss(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return loss(u, Q, n, zero, nonneg, workspace)
def separated_hessian(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return hessian(u, Q, n, zero, nonneg, workspace)

def separated_grad(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return np.copy(gradient(u, Q, n, zero, nonneg, workspace))
x_0 = np.zeros(n+m+1)
x_0[-1] = 1.

def separated_hessian(u):
common_computation_main(u, Q, n, zero, nonneg, workspace)
return hessian(u, Q, n, zero, nonneg, workspace)
else:

x_0 = np.zeros(n+m+1)
x_0[-1] = 1.
workspace = create_workspace(m, n, zero)

# these functions should be unpacked inside newton-cg
def separated_loss(xy):
return loss_gradient(
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[0]

def separated_grad(xy):
return np.copy(loss_gradient(
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)[1])

def separated_hessian(xy):
return hessian(
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)

x_0 = np.zeros(n+m)

old_x = np.empty_like(x_0)

def callback(current):
print('current loss', current.fun)
print('current kappa', current.x[-1])
# print('current kappa', current.x[-1])
# if current.fun < np.finfo(float).eps**2:
# raise StopIteration
# if current.x[-1] < 1e-2:
Expand All @@ -110,20 +138,27 @@ def callback(current):
jac=separated_grad,
hess=separated_hessian,
hessp=None,
callback=callback,
# callback=callback,
xtol=0., #1e-5,
eps=_epsilon, # unused
maxiter=10000,
# max_cg_iters=100,
disp=1,
return_all=False,
c1=1e-4, c2=0.9)
breakpoint()
# breakpoint()
print(f'NEWTON-CG took {time.time() - start:.3f} seconds')

if not NOHSDE:
# extract result
u = result['x']
assert u[-1] > 0, 'Certificates not yet implemented'
u = result['x']
assert u[-1] > 0, 'Certificates not yet implemented'
else:
xy = result['x']
u = np.empty(n+m+1, dtype=float)
u[:-1] = xy
u[-1] = 1.

# u /= u[-1]
v = Q @ u

Expand Down
Loading

0 comments on commit e5ad081

Please sign in to comment.