From b8cae2bf54064b2f29323e9d9f71c2225846c7fd Mon Sep 17 00:00:00 2001 From: Enzo Busseti Date: Sat, 29 Jun 2024 18:30:16 +0400 Subject: [PATCH] many experiments; made line search in ncg more aggressive; activated QR presolver (need to write sparse QR, I am afraid); main issue is (not with these tests, but increasing probl sizes) sometimes approx Newton direction is not good and we make super small steps, some logic around it to truncate Netwon CG much earlier in those situations, ~steepest descent, would probably suffice; also to explore Woodbury on gap part of the Hessian, keeping saved solution of the (b,c) on the non-gap part; using QR on matrix, CG solving on the non-gap part should be much easier. --- project_euromir/loss_no_hsde.py | 4 +- project_euromir/newton_cg.py | 10 ++-- project_euromir/solver_cg.py | 100 +++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 26 deletions(-) diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py index d1b30b2..ec027c5 100644 --- a/project_euromir/loss_no_hsde.py +++ b/project_euromir/loss_no_hsde.py @@ -93,7 +93,7 @@ def loss_gradient(xy, m, n, zero, matrix, b, c, workspace): return loss, workspace['gradient'] -def hessian(xy, m, n, zero, matrix, b, c, workspace): +def hessian(xy, m, n, zero, matrix, b, c, workspace, regularizer = 0.): """Hessian to use inside LBFGS loop.""" locals().update(workspace) @@ -136,7 +136,7 @@ def _matvec(dxdy): constants = np.concatenate([c, b]) result[:] += constants * (2 * (constants @ dxdy)) - return result + return result + regularizer * dxdy return sp.sparse.linalg.LinearOperator( shape=(len(xy), len(xy)), diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py index fa97d2e..ec521d5 100644 --- a/project_euromir/newton_cg.py +++ b/project_euromir/newton_cg.py @@ -12,7 +12,7 @@ line_search_wolfe2) from scipy.sparse.linalg import LinearOperator -ENZO_MODIFIED_MULTIPLIER = 1e-16 # in original it was 3. +ENZO_MODIFIED_MULTIPLIER = 1e-8 # in original it was 3. _epsilon = sqrt(np.finfo(float).eps) @@ -368,10 +368,12 @@ def terminate(warnflag, msg): # check curvature Ap = asarray(Ap).squeeze() # get rid of matrices... curv = np.dot(psupi, Ap) - if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps: + if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * dri0: # print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}') break elif curv < 0: + print('NEGATIVE CURVATURE CLAUSE TRIGGERED!!!!') + breakpoint() if (i > 0): break else: @@ -402,8 +404,8 @@ def terminate(warnflag, msg): except _LineSearchError: print('ENTERING FALL-BACK BACKTRACKING') # ENZO: fallback to back-tracking - for bktrit in range(50): - if f(xk) < 1e-20: + for bktrit in range(100): + if f(xk) < 1e-25: continue # we want to go to the else clause if f(xk + pk * 0.5**(-bktrit)) < f(xk): alphak = 0.5**(-bktrit) diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py index 293effe..544846d 100644 --- a/project_euromir/solver_cg.py +++ b/project_euromir/solver_cg.py @@ -39,11 +39,11 @@ from project_euromir.lbfgs import minimize_lbfgs if not NOHSDE: - from project_euromir.loss import (common_computation_main, + from project_euromir.loss import (_densify, common_computation_main, create_workspace_main, gradient, hessian, loss) else: - from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian) + from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian, _densify) from project_euromir.newton_cg import _epsilon, _minimize_newtoncg from project_euromir.refinement import refine @@ -52,14 +52,23 @@ if DEBUG: import matplotlib.pyplot as plt -QR_PRESOLVE = False +QR_PRESOLVE = True QR_PRESOLVE_AFTER = False REFINE = True +CG_REGULARIZER = 0. + +SCIPY_EXPERIMENTS = False + +HESSIAN_COUNTER = {'ba': 0} def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10): "Main function." + print( + f'PROBLEM SIZE: m={len(b)}, n={len(c)}, zero={zero},' + f' nonneg={nonneg}, nnz(matrix)={matrix.nnz}') + # some shape checking n = len(c) m = len(b) @@ -131,7 +140,17 @@ def separated_grad(xy): def separated_hessian(xy): return hessian( - xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace) + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=CG_REGULARIZER) + + def dense_hessian(xy): + return _densify(hessian( + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=1e-10)) + + def hessp(xy, var): + # breakpoint() + HESSIAN_COUNTER['ba'] += 1 + H = separated_hessian(xy) + return H @ var x_0 = np.zeros(n+m) @@ -148,30 +167,71 @@ def callback(current): # # current['x'] /= current.x[-1] # old_x[:] = current.x + # import matplotlib.pyplot as plt + # all_losses = [] + # def callback_plot(x): + # all_losses.append(separated_loss(x)) + start = time.time() # call newton-CG, implementation from scipy with modified termination c_2 = 0.1 for i in range(5): - result = _minimize_newtoncg( - fun=separated_loss, - x0=x_0, - args=(), - jac=separated_grad, - hess=separated_hessian, - hessp=None, - # callback=callback, - xtol=0., #1e-5, - eps=_epsilon, # unused - maxiter=10000, - # max_cg_iters=100, - disp=99, - return_all=False, - c1=1e-4, c2=c_2) + if not SCIPY_EXPERIMENTS: + result = _minimize_newtoncg( + fun=separated_loss, + x0=x_0, + args=(), + jac=separated_grad, + hess=separated_hessian, + hessp=None, + # callback=callback, + xtol=0., #1e-5, + eps=_epsilon, # unused + maxiter=10000, + # max_cg_iters=100, + disp=99, + return_all=False, + c1=1e-4, c2=c_2) + print(result) + else: + # NO, trust region (at least scipy implementations) don't work as + # well, I tried a bunch, only realistic competitor is l-bfgs + #global HESSIAN_COUNTER + HESSIAN_COUNTER['ba'] = 0 + result = sp.optimize.minimize( + fun=separated_loss, + x0=x_0, + args=(), + jac=separated_grad, + # hess=dense_hessian, # for dogleg + #callback=lambda x: print(separated_loss(x)), + callback=callback_plot, + hessp=hessp, # for trust-ncg + options = { + 'initial_trust_radius': .01, + 'max_trust_radius': .1, + 'eta': 0., 'gtol': 1e-16, + 'ftol': 0., + 'gtol': 0., + 'maxfun': 1e10, + 'maxiter': 1e10, + 'maxls': 100, + 'maxcor': 20, + }, + # method='dogleg', + # method='trust-ncg', + method='l-bfgs-b', + ) + print("NUMBER HESSIAN MATMULS", HESSIAN_COUNTER['ba']) + print(result) + # plt.plot(all_losses) + # plt.yscale('log') + # plt.show() # breakpoint() print(f'NEWTON-CG took {time.time() - start:.3f} seconds') loss_after_cg = separated_loss(result["x"]) print(f'LOSS {loss_after_cg:.2e}') - if loss_after_cg < 1e-20: + if loss_after_cg < 1e-25: break x_0 = result['x'] # breakpoint()