diff --git a/project_euromir/direction_calculator.py b/project_euromir/direction_calculator.py index 2461fb7..6c699db 100644 --- a/project_euromir/direction_calculator.py +++ b/project_euromir/direction_calculator.py @@ -58,6 +58,10 @@ def nocedal_wright_termination(_current_point, current_gradient): class DirectionCalculator: """Base class for descent direction calculation.""" + @property + def statistics(self): + return self._statistics if hasattr(self, '_statistics') else {} + def get_direction( self, current_point: np.array, current_gradient: np.array) -> np.array: """Calculate descent direction at current point given current gradient. @@ -133,6 +137,7 @@ class CGNewton(DenseNewton): """Calculate descent direction using Newton-CG.""" _x0 = None # overwritten in derived class(es) + _preconditioner = None # overwritten in derived class(es) def __init__( self, hessian_function, @@ -158,6 +163,7 @@ def __init__( self._max_cg_iters = max_cg_iters self._regularizer = regularizer self._minres = minres + self._statistics = {'HESSIAN_MATMULS': 0} super().__init__(hessian_function=hessian_function) def get_direction( @@ -184,13 +190,21 @@ def _counter(_): shape = current_hessian.shape, matvec = lambda x: orig_hessian @ x + self._regularizer * x ) + # breakpoint() + # diag = np.diag(self._preconditioner.todense()) + # real_diag = np.diag( _densify(current_hessian)) + # import matplotlib.pyplot as plt + # plt.plot(diag); plt.plot(real_diag); plt.show() + # breakpoint() result = getattr(sp.sparse.linalg, 'minres' if self._minres else 'cg')( A=current_hessian, b=-current_gradient, x0=self._x0, # this is None in this class rtol=self._rtol_termination(current_point, current_gradient), + M=self._preconditioner, # this is None in this class callback=_counter, maxiter=self._max_cg_iters)[0] + self._statistics['HESSIAN_MATMULS'] += iteration_counter logger.info( '%s calculated direction with residual norm %.2e, while the input' ' gradient had norm %.2e, in %d iterations', @@ -200,6 +214,45 @@ def _counter(_): iteration_counter) return result +class ExactDiagPreconditionedCGNewton(CGNewton): + """CG with exact diagonal preconditioning.""" + + def get_direction( + self, current_point: np.array, current_gradient: np.array) -> np.array: + + diag_H = np.array( + np.diag(_densify(self._hessian_function(current_point)))) + + diag_H[diag_H < 1e-4] = 1. + self._preconditioner = sp.diags(1./diag_H) + return super().get_direction( + current_point=current_point, current_gradient=current_gradient) + + +class DiagPreconditionedCGNewton(CGNewton): + """CG with diagonal preconditioning.""" + + def __init__(self, matrix, b, c, zero, **kwargs): + + self._matrix = matrix + self._b = b + m = len(b) + self._c = c + n = len(c) + self._zero = zero + gap_part = np.concatenate([self._c, self._b])**2 + col_norms = sp.sparse.linalg.norm(matrix, axis=0)**2 + assert len(col_norms) == n + row_norms = sp.sparse.linalg.norm(matrix, axis=1)**2 + assert len(row_norms) == m + diag = gap_part + diag[:n] += col_norms / np.sqrt(2) + diag[n:] += row_norms + diag[n+zero:] += 0.5 + self._preconditioner = sp.sparse.diags(1./diag) + super().__init__(**kwargs) + + class WarmStartedCGNewton(CGNewton): """Calculate descent direction using warm-started Newton CG. diff --git a/project_euromir/line_searcher.py b/project_euromir/line_searcher.py index 936242a..15bad5a 100644 --- a/project_euromir/line_searcher.py +++ b/project_euromir/line_searcher.py @@ -41,6 +41,10 @@ class LineSearchError(Exception): class LineSearcher: """Base class for line search algorithms using strong Wolfe conditions.""" + @property + def statistics(self): + return self._statistics if hasattr(self, '_statistics') else {} + def __init__( self, initial_step = 1., c_1=1e-4, c_2=0.9): """Initialize with parameters. @@ -80,18 +84,16 @@ def get_next( def armijo( self, current_loss: np.array, - current_gradient: np.array, - direction: np.array, + direction_dot_current_gradient: float, step_size: float, proposed_loss: np.array): """Is Armijo rule satisfied? :param current_loss: Loss at current point. :type current_loss: float - :param current_gradient: Gradient at current point. - :type current_gradient: np.array - :param direction: Search direction. - :type direction: np.array + :param direction_dot_current_gradient: Search direction dot gradient + at current point. + :type direction_dot_current_gradient: float :param step_size: Length of the step along direction. :type step_size: float :param proposed_loss: Loss at proposed point. @@ -100,9 +102,10 @@ def armijo( :returns: Is Armijo rule satisfied? :rtype: bool """ + assert direction_dot_current_gradient < 0 return proposed_loss <= current_loss + self._c_1 * step_size * ( - direction @ current_gradient) + direction_dot_current_gradient) def strong_curvature( self, @@ -145,6 +148,7 @@ def __init__( self._loss_function = loss_function self._backtrack = backtrack self._max_iters = max_iters + self._statistics = {'LOSS_EVALS': 0} super().__init__(initial_step=initial_step, c_1=c_1, c_2=None) def get_next( @@ -164,21 +168,23 @@ def get_next( :returns: Next point. :rtype: np.array """ - assert current_gradient @ direction < 0 + direction_dot_current_gradient = current_gradient @ direction + assert direction_dot_current_gradient < 0 step_size = float(self._initial_step) proposed_point = np.empty_like(current_point) - for _ in range(self._max_iters): + for bt_iters in range(self._max_iters): proposed_point[:] = current_point + direction * step_size proposed_loss = self._loss_function(proposed_point) if self.armijo( current_loss=current_loss, - current_gradient=current_gradient, direction=direction, + direction_dot_current_gradient=direction_dot_current_gradient, step_size=step_size, proposed_loss=proposed_loss): logger.info( '%s: step lenght %.2e, function calls %d, current loss ' '%.2e, previous loss %.2e', self.__class__.__name__, - step_size, _+1, proposed_loss, current_loss) + step_size, bt_iters+1, proposed_loss, current_loss) + self._statistics['LOSS_EVALS'] += bt_iters return (proposed_point, proposed_loss, None) step_size *= self._backtrack raise LineSearchError( diff --git a/project_euromir/minamide.py b/project_euromir/minamide.py index 96107c9..4e64955 100644 --- a/project_euromir/minamide.py +++ b/project_euromir/minamide.py @@ -56,7 +56,7 @@ def hessian_x_nogap(x, m, n, zero, matrix, b, regularizer = 0.): def _matvec(dx): result = np.empty_like(dx) - result[:] = 2 * (matrix.T @ (s_mask * (matrix @ dx))) + result[:] = (matrix.T @ (s_mask * (matrix @ dx))) return result + regularizer * dx return sp.sparse.linalg.LinearOperator( @@ -76,8 +76,8 @@ def _matvec(dy): result = np.empty_like(dy) # dual residual sqnorm - result[:] = 2 * (matrix @ (matrix.T @ dy)) - result[zero:] += 2 * y_mask * dy[zero:] + result[:] = (matrix @ (matrix.T @ dy)) + result[zero:] += y_mask * dy[zero:] return result + regularizer * dy @@ -93,7 +93,7 @@ def _matvec(dy): class MinamideTest(DirectionCalculator): def __init__(self, b, c, h_x_nogap, h_y_nogap): - self._constants_sqrt2 = np.concatenate([c, b]) * np.sqrt(2.) + self._constants = np.concatenate([c, b]) self._hessian_x_nogap = h_x_nogap self._hessian_y_nogap = h_y_nogap self._n = len(c) @@ -107,27 +107,42 @@ def get_direction(self, current_point, current_gradient): # gets stuck on directions of little descent, not sure how much # regularization is needed - # make all dense for test - hx = _densify(self._hessian_x_nogap(current_point[:self._n]))# + np.eye(self._n) * REGU - hy = _densify(self._hessian_y_nogap(current_point[self._n:]))# + np.eye(self._m) * REGU + # linear operators + hx = self._hessian_x_nogap(current_point[:self._n])# + np.eye(self._n) * REGU + hy = self._hessian_y_nogap(current_point[self._n:])# + np.eye(self._m) * REGU # minamide notation - S = sp.sparse.bmat([ - [hx, None], - [None, hy], - ]).tocsc() - phi = self._constants_sqrt2 + + # S = sp.sparse.bmat([ + # [hx, None], + # [None, hy], + # ]).tocsc() + + def _s_matvec(array): + return np.concatenate([ + hx @ array[:self._n], hy @ array[self._n:] + ]) + + S = sp.sparse.linalg.LinearOperator( + shape = (self._n + self._m, self._n + self._m), + matvec = _s_matvec + ) + + phi = self._constants + + hx_dense = _densify(hx) + hy_dense = _densify(hy) def _splus_matvec(array): return np.concatenate([ - np.linalg.lstsq(hx, array[:self._n], rcond=None)[0], - np.linalg.lstsq(hy, array[self._n:], rcond=None)[0] + np.linalg.lstsq(hx_dense, array[:self._n], rcond=None)[0], + np.linalg.lstsq(hy_dense, array[self._n:], rcond=None)[0] ]) # def _splus_matvec(array): # return np.concatenate([ - # sp.sparse.linalg.cg(hx, array[:self._n], rtol=1e-5)[0], - # sp.sparse.linalg.cg(hy, array[self._n:], rtol=1e-5)[0] + # sp.sparse.linalg.cg(hx, array[:self._n], rtol=min(0.5, np.linalg.norm(current_gradient)))[0], + # sp.sparse.linalg.cg(hy, array[self._n:], rtol=min(0.5, np.linalg.norm(current_gradient)))[0] # ]) # def _splus_matvec(array): @@ -265,7 +280,7 @@ def my_hessian_y_nogap(y): [hess_x_ng, np.zeros((n, m))], [np.zeros((m, n)), hess_y_ng]]) gap_constants = np.concatenate([c, b]) - hess_rebuilt += np.outer(gap_constants, gap_constants) * 2. + hess_rebuilt += np.outer(gap_constants, gap_constants) assert np.allclose(hess, hess_rebuilt) print('\tOK!') @@ -285,7 +300,7 @@ def my_hessian_y_nogap(y): S = np.bmat([ [hess_x_ng, np.zeros((n, m))], [np.zeros((m, n)), hess_y_ng]]).A - phi = np.concatenate([c, b]) * np.sqrt(2.) + phi = np.concatenate([c, b]) # obtain pinv of the reduced system Splus = np.linalg.pinv(S) diff --git a/project_euromir/solver_new.py b/project_euromir/solver_new.py index 2dfaf89..a56fe42 100644 --- a/project_euromir/solver_new.py +++ b/project_euromir/solver_new.py @@ -37,11 +37,10 @@ NOHSDE = True from project_euromir import equilibrate -from project_euromir.direction_calculator import (CGNewton, DenseNewton, - LSMRLevenbergMarquardt, - LSQRLevenbergMarquardt, - WarmStartedCGNewton, - nocedal_wright_termination) +from project_euromir.direction_calculator import ( + CGNewton, DenseNewton, DiagPreconditionedCGNewton, + ExactDiagPreconditionedCGNewton, LSMRLevenbergMarquardt, + LSQRLevenbergMarquardt, WarmStartedCGNewton, nocedal_wright_termination) from project_euromir.line_searcher import (BacktrackingLineSearcher, LogSpaceLineSearcher, ScipyLineSearcher) @@ -147,12 +146,29 @@ def _local_derivative_residual(xy): regularizer=1e-8, # it seems 1e-10 is best, but it's too sensitive to it :( ) - direction_calculator = CGNewton( + # doesn't improve, yet; we can make many tests + # direction_calculator = DiagPreconditionedCGNewton( + # matrix=matrix_transf, + # b=b_transf, + # c=c_transf, + # zero=zero, + # hessian_function=_local_hessian, + # rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)), #,**2), + # max_cg_iters=None, + # ) + + # this one also doesn't seem to improve; must be because my diagonal is + # already quite well scaled, and/or it interacts with the rank 1 part; + # makes sense to try diag plus rank 1; + # direction_calculator = ExactDiagPreconditionedCGNewton( + direction_calculator = CGNewton( # warm start causes issues if null space changes b/w iterations hessian_function=_local_hessian, - rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)), + rtol_termination=lambda x, g: min(0.5, np.linalg.norm(g)), #,**2), max_cg_iters=None, - minres=False, + # minres=True, # less stable, needs more stringent termination, + # and/or better logic to transition to refinement, but it is a bit faster + # it seems # regularizer=1e-8, # it seems 1e-10 is best, but it's too sensitive to it :( ) @@ -215,6 +231,9 @@ def _local_derivative_residual(xy): 'Certificates not yet implemented.') print('Newton-CG loop took %.2e seconds' % (time.time() - _start )) + print('Newton-CG iterations', newton_iterations) + print('DirectionCalculator statistics', direction_calculator.statistics) + print('LineSearcher statistics', line_searcher.statistics) # create HSDE variables for refinement u = np.empty(n+m+1, dtype=float)