diff --git a/project_euromir/loss_no_hsde.py b/project_euromir/loss_no_hsde.py index d1b30b2..ec027c5 100644 --- a/project_euromir/loss_no_hsde.py +++ b/project_euromir/loss_no_hsde.py @@ -93,7 +93,7 @@ def loss_gradient(xy, m, n, zero, matrix, b, c, workspace): return loss, workspace['gradient'] -def hessian(xy, m, n, zero, matrix, b, c, workspace): +def hessian(xy, m, n, zero, matrix, b, c, workspace, regularizer = 0.): """Hessian to use inside LBFGS loop.""" locals().update(workspace) @@ -136,7 +136,7 @@ def _matvec(dxdy): constants = np.concatenate([c, b]) result[:] += constants * (2 * (constants @ dxdy)) - return result + return result + regularizer * dxdy return sp.sparse.linalg.LinearOperator( shape=(len(xy), len(xy)), diff --git a/project_euromir/newton_cg.py b/project_euromir/newton_cg.py index fa97d2e..ec521d5 100644 --- a/project_euromir/newton_cg.py +++ b/project_euromir/newton_cg.py @@ -12,7 +12,7 @@ line_search_wolfe2) from scipy.sparse.linalg import LinearOperator -ENZO_MODIFIED_MULTIPLIER = 1e-16 # in original it was 3. +ENZO_MODIFIED_MULTIPLIER = 1e-8 # in original it was 3. _epsilon = sqrt(np.finfo(float).eps) @@ -368,10 +368,12 @@ def terminate(warnflag, msg): # check curvature Ap = asarray(Ap).squeeze() # get rid of matrices... curv = np.dot(psupi, Ap) - if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps: + if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * dri0: # print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}') break elif curv < 0: + print('NEGATIVE CURVATURE CLAUSE TRIGGERED!!!!') + breakpoint() if (i > 0): break else: @@ -402,8 +404,8 @@ def terminate(warnflag, msg): except _LineSearchError: print('ENTERING FALL-BACK BACKTRACKING') # ENZO: fallback to back-tracking - for bktrit in range(50): - if f(xk) < 1e-20: + for bktrit in range(100): + if f(xk) < 1e-25: continue # we want to go to the else clause if f(xk + pk * 0.5**(-bktrit)) < f(xk): alphak = 0.5**(-bktrit) diff --git a/project_euromir/solver_cg.py b/project_euromir/solver_cg.py index 293effe..544846d 100644 --- a/project_euromir/solver_cg.py +++ b/project_euromir/solver_cg.py @@ -39,11 +39,11 @@ from project_euromir.lbfgs import minimize_lbfgs if not NOHSDE: - from project_euromir.loss import (common_computation_main, + from project_euromir.loss import (_densify, common_computation_main, create_workspace_main, gradient, hessian, loss) else: - from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian) + from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian, _densify) from project_euromir.newton_cg import _epsilon, _minimize_newtoncg from project_euromir.refinement import refine @@ -52,14 +52,23 @@ if DEBUG: import matplotlib.pyplot as plt -QR_PRESOLVE = False +QR_PRESOLVE = True QR_PRESOLVE_AFTER = False REFINE = True +CG_REGULARIZER = 0. + +SCIPY_EXPERIMENTS = False + +HESSIAN_COUNTER = {'ba': 0} def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10): "Main function." + print( + f'PROBLEM SIZE: m={len(b)}, n={len(c)}, zero={zero},' + f' nonneg={nonneg}, nnz(matrix)={matrix.nnz}') + # some shape checking n = len(c) m = len(b) @@ -131,7 +140,17 @@ def separated_grad(xy): def separated_hessian(xy): return hessian( - xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace) + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=CG_REGULARIZER) + + def dense_hessian(xy): + return _densify(hessian( + xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=1e-10)) + + def hessp(xy, var): + # breakpoint() + HESSIAN_COUNTER['ba'] += 1 + H = separated_hessian(xy) + return H @ var x_0 = np.zeros(n+m) @@ -148,30 +167,71 @@ def callback(current): # # current['x'] /= current.x[-1] # old_x[:] = current.x + # import matplotlib.pyplot as plt + # all_losses = [] + # def callback_plot(x): + # all_losses.append(separated_loss(x)) + start = time.time() # call newton-CG, implementation from scipy with modified termination c_2 = 0.1 for i in range(5): - result = _minimize_newtoncg( - fun=separated_loss, - x0=x_0, - args=(), - jac=separated_grad, - hess=separated_hessian, - hessp=None, - # callback=callback, - xtol=0., #1e-5, - eps=_epsilon, # unused - maxiter=10000, - # max_cg_iters=100, - disp=99, - return_all=False, - c1=1e-4, c2=c_2) + if not SCIPY_EXPERIMENTS: + result = _minimize_newtoncg( + fun=separated_loss, + x0=x_0, + args=(), + jac=separated_grad, + hess=separated_hessian, + hessp=None, + # callback=callback, + xtol=0., #1e-5, + eps=_epsilon, # unused + maxiter=10000, + # max_cg_iters=100, + disp=99, + return_all=False, + c1=1e-4, c2=c_2) + print(result) + else: + # NO, trust region (at least scipy implementations) don't work as + # well, I tried a bunch, only realistic competitor is l-bfgs + #global HESSIAN_COUNTER + HESSIAN_COUNTER['ba'] = 0 + result = sp.optimize.minimize( + fun=separated_loss, + x0=x_0, + args=(), + jac=separated_grad, + # hess=dense_hessian, # for dogleg + #callback=lambda x: print(separated_loss(x)), + callback=callback_plot, + hessp=hessp, # for trust-ncg + options = { + 'initial_trust_radius': .01, + 'max_trust_radius': .1, + 'eta': 0., 'gtol': 1e-16, + 'ftol': 0., + 'gtol': 0., + 'maxfun': 1e10, + 'maxiter': 1e10, + 'maxls': 100, + 'maxcor': 20, + }, + # method='dogleg', + # method='trust-ncg', + method='l-bfgs-b', + ) + print("NUMBER HESSIAN MATMULS", HESSIAN_COUNTER['ba']) + print(result) + # plt.plot(all_losses) + # plt.yscale('log') + # plt.show() # breakpoint() print(f'NEWTON-CG took {time.time() - start:.3f} seconds') loss_after_cg = separated_loss(result["x"]) print(f'LOSS {loss_after_cg:.2e}') - if loss_after_cg < 1e-20: + if loss_after_cg < 1e-25: break x_0 = result['x'] # breakpoint()