Skip to content

Commit

Permalink
playing with linear solvers;
Browse files Browse the repository at this point in the history
seems that LSMR on the Levenberg-Marquadt
system is a bit faster than CG Newton on the
squared system. Sadly have not been able yet
to warm-start reliably the linear solve, perhaps
the line search interacts with that, when we make very small
steps.
It is very tricky to do anything more sophisticated than a
backtracking line search (which however needs to be optimized
to avoid many unnecessary evals). What I want to try next
is to solve a Woodbury simplification of the system, which
is also tricky because the system is not full rank.
  • Loading branch information
enzbus committed Jul 5, 2024
1 parent a221949 commit 08c2ec3
Show file tree
Hide file tree
Showing 5 changed files with 335 additions and 34 deletions.
40 changes: 34 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ three platforms. Then:

.. code-block:: bash
pip install -U https://github.com/enzbus/project_euromir
pip install --update --force-reinstall git+https://github.com/enzbus/project_euromir
Usage
Expand All @@ -172,23 +172,51 @@ single C function the user interacts with will be also documented, for usage
from other runtime environments. In fact, our preview interface already works,
and that's what we're using in our testsuite. If you installed as described
above you can already test the solver on linear programs of moderate size,
we're testing so far up to a 2 or 3 hundreds variables. From our tests you
we're testing so far up to a few hundreds variables. From our tests you
should already observe higher numerical accuracy on the constraints,
which are smaller or close to the machine precision of double arithmetics (2.2e-16),
and/or lower objective value on the solution, than with any other numerical solver
and/or lower objective value on the solution, than with any other numerical solver.

For example, on this :math:`\ell_1` regression linear program you can see better
constraints satisfaction than with a state-of-the-art interior point solver. If
you run with a solver with exact theoretical satisfaction of the optimality
conditions, like ``GLPK`` for linear programs, you will see about
the same numerical error on the constraints, but better objective value at
optimality, with our prototype. Keep in mind that our solver is **factorization
free**; it uses only iterative linear algebra methods, with linear algorithmic
complexity and linear memory usage. As such, it is fully parallelizable without
loss of accuracy. Most of the logic used is currently implemented in Python, as
we finalize the algorithmic details, so it will run slower than fully compiled
codes.

.. code-block:: python
import numpy as np
import cvxpy as cp
from project_euromir import Solver
m, n = 100, 50
m, n = 200, 100
np.random.seed(0)
A = np.random.randn(m, n)
b = np.random.randn(m)
x = cp.Variable(n)
objective = cp.Minimize(cp.norm1(A @ x - b))
constraints = [cp.abs(x) <= .05]
cp.Problem(objective, constraints).solve(solver=Solver())
cp.Problem(objective, constraints).solve(solver=Solver())
print('Project EuroMir')
print(
'constraints violation infinity norm: %.2e' %
np.max(np.abs(constraints[0].violation())))
print('Objective value: %.16e' % objective.value)
cp.Problem(objective, constraints).solve(
solver='CLARABEL', max_iter=1000, tol_gap_abs=1e-64, tol_gap_rel=1e-64,
tol_feas=1e-64, tol_infeas_abs=1e-64, tol_infeas_rel=1e-64,
tol_ktratio=1e-64)
print('State-of-the-art interior point solver, maxing out accuracy:')
print(
'constraints violation infinity norm: %.2e' %
np.max(np.abs(constraints[0].violation())))
print('Objective value: %.16e' % objective.value)
64 changes: 64 additions & 0 deletions project_euromir/direction_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,67 @@ def get_direction(
current_point=current_point, current_gradient=current_gradient)
self._x0 = direction
return direction

class LSQRLevenbergMarquardt(DirectionCalculator):
"""Calculate descent direction using LSQR-LM."""

_x0 = None # overwritten in derived class(es)

def __init__(
self, residual_function, derivative_residual_function,
warm_start=False):
"""Initialize with functions to calculate residual and derivative.
:param residual_function:
:type residual_function: callable
:param derivative_residual_function:
:type derivative_residual_function: callable
"""
self._residual_function = residual_function
self._derivative_residual_function = derivative_residual_function
self._warm_start = warm_start

def _inner_function(self, derivative_residual, residual, current_gradient):
"""In order to replace with LSMR below."""
result = sp.sparse.linalg.lsqr(
derivative_residual, -residual, x0=self._x0, calc_var=False,
atol=min(0.5, np.linalg.norm(current_gradient)), btol=0.)
return result[0], result[2] # solution, number of iterations

def get_direction(
self, current_point: np.array, current_gradient: np.array) -> np.array:
"""Calculate descent direction at current point.
:param current_point: Current point.
:type current_point: np.array
:param current_gradient: Current gradient.
:type current_gradient: np.array
:returns: Descent direction.
:rtype: np.array
"""
residual = self._residual_function(current_point)
derivative_residual = self._derivative_residual_function(current_point)
solution, n_iters = self._inner_function(
derivative_residual, residual, current_gradient)
logger.info(
'%s calculated direction with error norm %.2e, while the input'
' gradient had norm %.2e, in %d iterations',
self.__class__.__name__,
np.linalg.norm(derivative_residual @ solution + residual),
np.linalg.norm(current_gradient), n_iters)
if self._warm_start:
# LSQR fails with warm-start, somehow gets stuck
# on directions of very small norm but not good descent
self._x0 = solution
return solution

class LSMRLevenbergMarquardt(LSQRLevenbergMarquardt):
"""Calculate descent direction using LSMR-LM."""

def _inner_function(self, derivative_residual, residual, current_gradient):
"""Just the call to the iterative solver."""
result = sp.sparse.linalg.lsmr(
derivative_residual, -residual, x0=self._x0,
atol=min(0.5, np.linalg.norm(current_gradient)), btol=0.)
return result[0], result[2] # solution, number of iterations
146 changes: 137 additions & 9 deletions project_euromir/loss_no_hsde.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,6 @@ def loss_gradient(xy, m, n, zero, matrix, b, c, workspace):
def hessian(xy, m, n, zero, matrix, b, c, workspace, regularizer = 0.):
"""Hessian to use inside LBFGS loop."""

locals().update(workspace)

x = xy[:n]
y = xy[n:]

Expand Down Expand Up @@ -141,16 +139,103 @@ def _matvec(dxdy):
matvec=_matvec
)

def _densify(linear_operator):
assert linear_operator.shape[0] == linear_operator.shape[1]
def residual(xy, m, n, zero, matrix, b, c):
"""Residual function to use L-M approach instead."""

x = xy[:n]
y = xy[n:]

# zero cone dual variable is unconstrained
y_error = np.minimum(y[zero:], 0.)

# this must be all zeros
dual_residual = matrix.T @ y + c

# slacks
s = -matrix @ x + b

# slacks for zero cone must be zero
s_error = np.copy(s)
s_error[zero:] = np.minimum(s[zero:], 0.)

# duality gap
gap = c.T @ x + b.T @ y

# build the full residual by concatenating residuals
res = np.empty(n + 2 * m - zero + 1, dtype=float)
res[:m-zero] = y_error
res[m-zero:m+n-zero] = dual_residual
res[-1-m:-1] = s_error
res[-1] = gap

return res

def Dresidual(xy, m, n, zero, matrix, b, c):
"""Linear operator to matrix multiply the residual derivative."""

x = xy[:n]
y = xy[n:]

# zero cone dual variable is unconstrained
y_mask = (y[zero:] <= 0.) * 1.

# slacks
s = -matrix @ x + b

# slacks for zero cone must be zero
s_mask = np.ones_like(s)
s_mask[zero:] = s[zero:] <= 0.

# concatenation of primal and dual costs
pridua = np.concatenate([c, b])

def _matvec(dxy):

# decompose direction
dx = dxy[:n]
dy = dxy[n:]

# compose result
dr = np.empty(n + 2 * m - zero + 1, dtype=float)
dr[:m-zero] = y_mask * dy[zero:]
dr[m-zero:m+n-zero] = matrix.T @ dy
dr[-1-m:-1] = s_mask * (-(matrix @ dx))
dr[-1] = pridua @ dxy

return dr

def _rmatvec(dr):

# decompose direction
dy_err = dr[:m-zero]
ddua_res = dr[m-zero:m+n-zero]
ds_err = dr[-1-m:-1]
dgap = dr[-1]

# compose result
dxy = np.zeros(n + m, dtype=float)
dxy[-(m-zero):] += y_mask * dy_err
dxy[-m:] += matrix @ ddua_res
dxy[:n] -= matrix.T @ (s_mask * ds_err)
dxy += dgap * pridua

return dxy

return sp.sparse.linalg.LinearOperator(
shape=(n + 2 * m - zero + 1, n+m),
matvec = _matvec,
rmatvec = _rmatvec)

def _densify_also_nonsquare(linear_operator):
result = np.empty(linear_operator.shape)
identity = np.eye(result.shape[0])
for i in range(len(identity)):
result[:, i] = linear_operator.matvec(identity[:, i])
for j in range(linear_operator.shape[1]):
e_j = np.zeros(linear_operator.shape[1])
e_j[j] = 1.
result[:, j] = linear_operator.matvec(e_j)
return result


if __name__ == '__main__':
if __name__ == '__main__': # pragma: no cover

from scipy.optimize import check_grad

Expand All @@ -173,7 +258,20 @@ def my_grad(xy):
return np.copy(loss_gradient(xy, m, n, zero, matrix, b, c, wks)[1])

def my_hessian(xy):
return _densify(hessian(xy, m, n, zero, matrix, b, c, wks))
return _densify_also_nonsquare(
hessian(xy, m, n, zero, matrix, b, c, wks))

def my_residual(xy):
return residual(xy, m, n, zero, matrix, b, c)

def my_Dresidual(xy):
return Dresidual(xy, m, n, zero, matrix, b, c)

def my_hessian_from_dresidual(xy):
DR = Dresidual(xy, m, n, zero, matrix, b, c)
return sp.sparse.linalg.LinearOperator(
(n+m, n+m),
matvec = lambda dxy: DR.T @ (DR @ (dxy * 2.)))

print('\nCHECKING GRADIENT')
for i in range(10):
Expand All @@ -182,3 +280,33 @@ def my_hessian(xy):
print('\nCHECKING HESSIAN')
for i in range(10):
print(check_grad(my_grad, my_hessian, np.random.randn(n+m)))

print('\nCHECKING LOSS AND RESIDUAL CONSISTENT')
for i in range(10):
xy = np.random.randn(n+m)
assert np.isclose(my_loss(xy), np.linalg.norm(my_residual(xy))**2)
print('\tOK!')

print('\nCHECKING DR and DR^T CONSISTENT')
for i in range(10):
xy = np.random.randn(n+m)
DR = _densify_also_nonsquare(my_Dresidual(xy))
DRT = _densify_also_nonsquare(my_Dresidual(xy).T)
assert np.allclose(DR.T, DRT)
print('\tOK!')

print('\nCHECKING (D)RESIDUAL AND GRADIENT CONSISTENT')
for i in range(10):
xy = np.random.randn(n+m)
grad = my_grad(xy)
newgrad = 2 * (my_Dresidual(xy).T @ my_residual(xy))
assert np.allclose(grad, newgrad)
print('\tOK!')

print('\nCHECKING DRESIDUAL AND HESSIAN CONSISTENT')
for i in range(10):
xy = np.random.randn(n+m)
hess = my_hessian(xy)
hess_rebuilt = _densify_also_nonsquare(my_hessian_from_dresidual(xy))
assert np.allclose(hess, hess_rebuilt)
print('\tOK!')
Loading

0 comments on commit 08c2ec3

Please sign in to comment.