Skip to content

Commit

Permalink
some work on preconditioning CG;
Browse files Browse the repository at this point in the history
  • Loading branch information
enzbus committed Jul 9, 2024
1 parent ad1de86 commit 3e77ab6
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 37 deletions.
53 changes: 53 additions & 0 deletions project_euromir/direction_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ def nocedal_wright_termination(_current_point, current_gradient):
class DirectionCalculator:
"""Base class for descent direction calculation."""

@property
def statistics(self):
return self._statistics if hasattr(self, '_statistics') else {}

def get_direction(
self, current_point: np.array, current_gradient: np.array) -> np.array:
"""Calculate descent direction at current point given current gradient.
Expand Down Expand Up @@ -133,6 +137,7 @@ class CGNewton(DenseNewton):
"""Calculate descent direction using Newton-CG."""

_x0 = None # overwritten in derived class(es)
_preconditioner = None # overwritten in derived class(es)

def __init__(
self, hessian_function,
Expand All @@ -158,6 +163,7 @@ def __init__(
self._max_cg_iters = max_cg_iters
self._regularizer = regularizer
self._minres = minres
self._statistics = {'HESSIAN_MATMULS': 0}
super().__init__(hessian_function=hessian_function)

def get_direction(
Expand All @@ -184,13 +190,21 @@ def _counter(_):
shape = current_hessian.shape,
matvec = lambda x: orig_hessian @ x + self._regularizer * x
)
# breakpoint()
# diag = np.diag(self._preconditioner.todense())
# real_diag = np.diag( _densify(current_hessian))
# import matplotlib.pyplot as plt
# plt.plot(diag); plt.plot(real_diag); plt.show()
# breakpoint()
result = getattr(sp.sparse.linalg, 'minres' if self._minres else 'cg')(
A=current_hessian,
b=-current_gradient,
x0=self._x0, # this is None in this class
rtol=self._rtol_termination(current_point, current_gradient),
M=self._preconditioner, # this is None in this class
callback=_counter,
maxiter=self._max_cg_iters)[0]
self._statistics['HESSIAN_MATMULS'] += iteration_counter
logger.info(
'%s calculated direction with residual norm %.2e, while the input'
' gradient had norm %.2e, in %d iterations',
Expand All @@ -200,6 +214,45 @@ def _counter(_):
iteration_counter)
return result

class ExactDiagPreconditionedCGNewton(CGNewton):
"""CG with exact diagonal preconditioning."""

def get_direction(
self, current_point: np.array, current_gradient: np.array) -> np.array:

diag_H = np.array(
np.diag(_densify(self._hessian_function(current_point))))

diag_H[diag_H < 1e-4] = 1.
self._preconditioner = sp.diags(1./diag_H)
return super().get_direction(
current_point=current_point, current_gradient=current_gradient)


class DiagPreconditionedCGNewton(CGNewton):
"""CG with diagonal preconditioning."""

def __init__(self, matrix, b, c, zero, **kwargs):

self._matrix = matrix
self._b = b
m = len(b)
self._c = c
n = len(c)
self._zero = zero
gap_part = np.concatenate([self._c, self._b])**2
col_norms = sp.sparse.linalg.norm(matrix, axis=0)**2
assert len(col_norms) == n
row_norms = sp.sparse.linalg.norm(matrix, axis=1)**2
assert len(row_norms) == m
diag = gap_part
diag[:n] += col_norms / np.sqrt(2)
diag[n:] += row_norms
diag[n+zero:] += 0.5
self._preconditioner = sp.sparse.diags(1./diag)
super().__init__(**kwargs)


class WarmStartedCGNewton(CGNewton):
"""Calculate descent direction using warm-started Newton CG.
Expand Down
28 changes: 17 additions & 11 deletions project_euromir/line_searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ class LineSearchError(Exception):
class LineSearcher:
"""Base class for line search algorithms using strong Wolfe conditions."""

@property
def statistics(self):
return self._statistics if hasattr(self, '_statistics') else {}

def __init__(
self, initial_step = 1., c_1=1e-4, c_2=0.9):
"""Initialize with parameters.
Expand Down Expand Up @@ -80,18 +84,16 @@ def get_next(
def armijo(
self,
current_loss: np.array,
current_gradient: np.array,
direction: np.array,
direction_dot_current_gradient: float,
step_size: float,
proposed_loss: np.array):
"""Is Armijo rule satisfied?
:param current_loss: Loss at current point.
:type current_loss: float
:param current_gradient: Gradient at current point.
:type current_gradient: np.array
:param direction: Search direction.
:type direction: np.array
:param direction_dot_current_gradient: Search direction dot gradient
at current point.
:type direction_dot_current_gradient: float
:param step_size: Length of the step along direction.
:type step_size: float
:param proposed_loss: Loss at proposed point.
Expand All @@ -100,9 +102,10 @@ def armijo(
:returns: Is Armijo rule satisfied?
:rtype: bool
"""
assert direction_dot_current_gradient < 0

return proposed_loss <= current_loss + self._c_1 * step_size * (
direction @ current_gradient)
direction_dot_current_gradient)

def strong_curvature(
self,
Expand Down Expand Up @@ -145,6 +148,7 @@ def __init__(
self._loss_function = loss_function
self._backtrack = backtrack
self._max_iters = max_iters
self._statistics = {'LOSS_EVALS': 0}
super().__init__(initial_step=initial_step, c_1=c_1, c_2=None)

def get_next(
Expand All @@ -164,21 +168,23 @@ def get_next(
:returns: Next point.
:rtype: np.array
"""
assert current_gradient @ direction < 0
direction_dot_current_gradient = current_gradient @ direction
assert direction_dot_current_gradient < 0
step_size = float(self._initial_step)
proposed_point = np.empty_like(current_point)

for _ in range(self._max_iters):
for bt_iters in range(self._max_iters):
proposed_point[:] = current_point + direction * step_size
proposed_loss = self._loss_function(proposed_point)
if self.armijo(
current_loss=current_loss,
current_gradient=current_gradient, direction=direction,
direction_dot_current_gradient=direction_dot_current_gradient,
step_size=step_size, proposed_loss=proposed_loss):
logger.info(
'%s: step lenght %.2e, function calls %d, current loss '
'%.2e, previous loss %.2e', self.__class__.__name__,
step_size, _+1, proposed_loss, current_loss)
step_size, bt_iters+1, proposed_loss, current_loss)
self._statistics['LOSS_EVALS'] += bt_iters
return (proposed_point, proposed_loss, None)
step_size *= self._backtrack
raise LineSearchError(
Expand Down
51 changes: 33 additions & 18 deletions project_euromir/minamide.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def hessian_x_nogap(x, m, n, zero, matrix, b, regularizer = 0.):

def _matvec(dx):
result = np.empty_like(dx)
result[:] = 2 * (matrix.T @ (s_mask * (matrix @ dx)))
result[:] = (matrix.T @ (s_mask * (matrix @ dx)))
return result + regularizer * dx

return sp.sparse.linalg.LinearOperator(
Expand All @@ -76,8 +76,8 @@ def _matvec(dy):
result = np.empty_like(dy)

# dual residual sqnorm
result[:] = 2 * (matrix @ (matrix.T @ dy))
result[zero:] += 2 * y_mask * dy[zero:]
result[:] = (matrix @ (matrix.T @ dy))
result[zero:] += y_mask * dy[zero:]

return result + regularizer * dy

Expand All @@ -93,7 +93,7 @@ def _matvec(dy):
class MinamideTest(DirectionCalculator):

def __init__(self, b, c, h_x_nogap, h_y_nogap):
self._constants_sqrt2 = np.concatenate([c, b]) * np.sqrt(2.)
self._constants = np.concatenate([c, b])
self._hessian_x_nogap = h_x_nogap
self._hessian_y_nogap = h_y_nogap
self._n = len(c)
Expand All @@ -107,27 +107,42 @@ def get_direction(self, current_point, current_gradient):
# gets stuck on directions of little descent, not sure how much
# regularization is needed

# make all dense for test
hx = _densify(self._hessian_x_nogap(current_point[:self._n]))# + np.eye(self._n) * REGU
hy = _densify(self._hessian_y_nogap(current_point[self._n:]))# + np.eye(self._m) * REGU
# linear operators
hx = self._hessian_x_nogap(current_point[:self._n])# + np.eye(self._n) * REGU
hy = self._hessian_y_nogap(current_point[self._n:])# + np.eye(self._m) * REGU

# minamide notation
S = sp.sparse.bmat([
[hx, None],
[None, hy],
]).tocsc()
phi = self._constants_sqrt2

# S = sp.sparse.bmat([
# [hx, None],
# [None, hy],
# ]).tocsc()

def _s_matvec(array):
return np.concatenate([
hx @ array[:self._n], hy @ array[self._n:]
])

S = sp.sparse.linalg.LinearOperator(
shape = (self._n + self._m, self._n + self._m),
matvec = _s_matvec
)

phi = self._constants

hx_dense = _densify(hx)
hy_dense = _densify(hy)

def _splus_matvec(array):
return np.concatenate([
np.linalg.lstsq(hx, array[:self._n], rcond=None)[0],
np.linalg.lstsq(hy, array[self._n:], rcond=None)[0]
np.linalg.lstsq(hx_dense, array[:self._n], rcond=None)[0],
np.linalg.lstsq(hy_dense, array[self._n:], rcond=None)[0]
])

# def _splus_matvec(array):
# return np.concatenate([
# sp.sparse.linalg.cg(hx, array[:self._n], rtol=1e-5)[0],
# sp.sparse.linalg.cg(hy, array[self._n:], rtol=1e-5)[0]
# sp.sparse.linalg.cg(hx, array[:self._n], rtol=min(0.5, np.linalg.norm(current_gradient)))[0],
# sp.sparse.linalg.cg(hy, array[self._n:], rtol=min(0.5, np.linalg.norm(current_gradient)))[0]
# ])

# def _splus_matvec(array):
Expand Down Expand Up @@ -265,7 +280,7 @@ def my_hessian_y_nogap(y):
[hess_x_ng, np.zeros((n, m))],
[np.zeros((m, n)), hess_y_ng]])
gap_constants = np.concatenate([c, b])
hess_rebuilt += np.outer(gap_constants, gap_constants) * 2.
hess_rebuilt += np.outer(gap_constants, gap_constants)
assert np.allclose(hess, hess_rebuilt)
print('\tOK!')

Expand All @@ -285,7 +300,7 @@ def my_hessian_y_nogap(y):
S = np.bmat([
[hess_x_ng, np.zeros((n, m))],
[np.zeros((m, n)), hess_y_ng]]).A
phi = np.concatenate([c, b]) * np.sqrt(2.)
phi = np.concatenate([c, b])

# obtain pinv of the reduced system
Splus = np.linalg.pinv(S)
Expand Down
35 changes: 27 additions & 8 deletions project_euromir/solver_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,10 @@
NOHSDE = True

from project_euromir import equilibrate
from project_euromir.direction_calculator import (CGNewton, DenseNewton,
LSMRLevenbergMarquardt,
LSQRLevenbergMarquardt,
WarmStartedCGNewton,
nocedal_wright_termination)
from project_euromir.direction_calculator import (
CGNewton, DenseNewton, DiagPreconditionedCGNewton,
ExactDiagPreconditionedCGNewton, LSMRLevenbergMarquardt,
LSQRLevenbergMarquardt, WarmStartedCGNewton, nocedal_wright_termination)
from project_euromir.line_searcher import (BacktrackingLineSearcher,
LogSpaceLineSearcher,
ScipyLineSearcher)
Expand Down Expand Up @@ -147,12 +146,29 @@ def _local_derivative_residual(xy):
regularizer=1e-8, # it seems 1e-10 is best, but it's too sensitive to it :(
)

direction_calculator = CGNewton(
# doesn't improve, yet; we can make many tests
# direction_calculator = DiagPreconditionedCGNewton(
# matrix=matrix_transf,
# b=b_transf,
# c=c_transf,
# zero=zero,
# hessian_function=_local_hessian,
# rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)), #,**2),
# max_cg_iters=None,
# )

# this one also doesn't seem to improve; must be because my diagonal is
# already quite well scaled, and/or it interacts with the rank 1 part;
# makes sense to try diag plus rank 1;
# direction_calculator = ExactDiagPreconditionedCGNewton(
direction_calculator = CGNewton(
# warm start causes issues if null space changes b/w iterations
hessian_function=_local_hessian,
rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)),
rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)), #,**2),
max_cg_iters=None,
minres=False,
# minres=True, # less stable, needs more stringent termination,
# and/or better logic to transition to refinement, but it is a bit faster
# it seems
# regularizer=1e-8, # it seems 1e-10 is best, but it's too sensitive to it :(
)

Expand Down Expand Up @@ -215,6 +231,9 @@ def _local_derivative_residual(xy):
'Certificates not yet implemented.')

print('Newton-CG loop took %.2e seconds' % (time.time() - _start ))
print('Newton-CG iterations', newton_iterations)
print('DirectionCalculator statistics', direction_calculator.statistics)
print('LineSearcher statistics', line_searcher.statistics)

# create HSDE variables for refinement
u = np.empty(n+m+1, dtype=float)
Expand Down

0 comments on commit 3e77ab6

Please sign in to comment.