Skip to content

Commit

Permalink
many experiments;
Browse files Browse the repository at this point in the history
made line search in ncg more aggressive;
activated QR presolver (need to write sparse QR, I am afraid);
main issue is (not with these tests, but increasing probl sizes)
sometimes approx Newton direction is not good and we make super small
steps, some logic around it to truncate Netwon CG much earlier in those
situations, ~steepest descent, would probably suffice; also to explore
Woodbury on gap part of the Hessian, keeping saved solution of the
(b,c) on the non-gap part; using QR on matrix, CG solving on the non-gap part
should be much easier.
  • Loading branch information
enzbus committed Jun 29, 2024
1 parent 202aec2 commit b8cae2b
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 26 deletions.
4 changes: 2 additions & 2 deletions project_euromir/loss_no_hsde.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def loss_gradient(xy, m, n, zero, matrix, b, c, workspace):

return loss, workspace['gradient']

def hessian(xy, m, n, zero, matrix, b, c, workspace):
def hessian(xy, m, n, zero, matrix, b, c, workspace, regularizer = 0.):
"""Hessian to use inside LBFGS loop."""

locals().update(workspace)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _matvec(dxdy):
constants = np.concatenate([c, b])
result[:] += constants * (2 * (constants @ dxdy))

return result
return result + regularizer * dxdy

return sp.sparse.linalg.LinearOperator(
shape=(len(xy), len(xy)),
Expand Down
10 changes: 6 additions & 4 deletions project_euromir/newton_cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
line_search_wolfe2)
from scipy.sparse.linalg import LinearOperator

ENZO_MODIFIED_MULTIPLIER = 1e-16 # in original it was 3.
ENZO_MODIFIED_MULTIPLIER = 1e-8 # in original it was 3.

_epsilon = sqrt(np.finfo(float).eps)

Expand Down Expand Up @@ -368,10 +368,12 @@ def terminate(warnflag, msg):
# check curvature
Ap = asarray(Ap).squeeze() # get rid of matrices...
curv = np.dot(psupi, Ap)
if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * float64eps:
if 0 <= curv <= ENZO_MODIFIED_MULTIPLIER * dri0:
# print(f'iter {k}, breaking CG loop at cgiter {k2} with curv {curv:.2e}')
break
elif curv < 0:
print('NEGATIVE CURVATURE CLAUSE TRIGGERED!!!!')
breakpoint()
if (i > 0):
break
else:
Expand Down Expand Up @@ -402,8 +404,8 @@ def terminate(warnflag, msg):
except _LineSearchError:
print('ENTERING FALL-BACK BACKTRACKING')
# ENZO: fallback to back-tracking
for bktrit in range(50):
if f(xk) < 1e-20:
for bktrit in range(100):
if f(xk) < 1e-25:
continue # we want to go to the else clause
if f(xk + pk * 0.5**(-bktrit)) < f(xk):
alphak = 0.5**(-bktrit)
Expand Down
100 changes: 80 additions & 20 deletions project_euromir/solver_cg.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@
from project_euromir.lbfgs import minimize_lbfgs

if not NOHSDE:
from project_euromir.loss import (common_computation_main,
from project_euromir.loss import (_densify, common_computation_main,
create_workspace_main, gradient, hessian,
loss)
else:
from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian)
from project_euromir.loss_no_hsde import (create_workspace, loss_gradient, hessian, _densify)

from project_euromir.newton_cg import _epsilon, _minimize_newtoncg
from project_euromir.refinement import refine
Expand All @@ -52,14 +52,23 @@
if DEBUG:
import matplotlib.pyplot as plt

QR_PRESOLVE = False
QR_PRESOLVE = True
QR_PRESOLVE_AFTER = False
REFINE = True

CG_REGULARIZER = 0.

SCIPY_EXPERIMENTS = False

HESSIAN_COUNTER = {'ba': 0}

def solve(matrix, b, c, zero, nonneg, lbfgs_memory=10):
"Main function."

print(
f'PROBLEM SIZE: m={len(b)}, n={len(c)}, zero={zero},'
f' nonneg={nonneg}, nnz(matrix)={matrix.nnz}')

# some shape checking
n = len(c)
m = len(b)
Expand Down Expand Up @@ -131,7 +140,17 @@ def separated_grad(xy):

def separated_hessian(xy):
return hessian(
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace)
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=CG_REGULARIZER)

def dense_hessian(xy):
return _densify(hessian(
xy, m=m, n=n, zero=zero, matrix=matrix_transf, b=b_transf, c=c_transf, workspace=workspace, regularizer=1e-10))

def hessp(xy, var):
# breakpoint()
HESSIAN_COUNTER['ba'] += 1
H = separated_hessian(xy)
return H @ var

x_0 = np.zeros(n+m)

Expand All @@ -148,30 +167,71 @@ def callback(current):
# # current['x'] /= current.x[-1]
# old_x[:] = current.x

# import matplotlib.pyplot as plt
# all_losses = []
# def callback_plot(x):
# all_losses.append(separated_loss(x))

start = time.time()
# call newton-CG, implementation from scipy with modified termination
c_2 = 0.1
for i in range(5):
result = _minimize_newtoncg(
fun=separated_loss,
x0=x_0,
args=(),
jac=separated_grad,
hess=separated_hessian,
hessp=None,
# callback=callback,
xtol=0., #1e-5,
eps=_epsilon, # unused
maxiter=10000,
# max_cg_iters=100,
disp=99,
return_all=False,
c1=1e-4, c2=c_2)
if not SCIPY_EXPERIMENTS:
result = _minimize_newtoncg(
fun=separated_loss,
x0=x_0,
args=(),
jac=separated_grad,
hess=separated_hessian,
hessp=None,
# callback=callback,
xtol=0., #1e-5,
eps=_epsilon, # unused
maxiter=10000,
# max_cg_iters=100,
disp=99,
return_all=False,
c1=1e-4, c2=c_2)
print(result)
else:
# NO, trust region (at least scipy implementations) don't work as
# well, I tried a bunch, only realistic competitor is l-bfgs
#global HESSIAN_COUNTER
HESSIAN_COUNTER['ba'] = 0
result = sp.optimize.minimize(
fun=separated_loss,
x0=x_0,
args=(),
jac=separated_grad,
# hess=dense_hessian, # for dogleg
#callback=lambda x: print(separated_loss(x)),
callback=callback_plot,
hessp=hessp, # for trust-ncg
options = {
'initial_trust_radius': .01,
'max_trust_radius': .1,
'eta': 0., 'gtol': 1e-16,
'ftol': 0.,
'gtol': 0.,
'maxfun': 1e10,
'maxiter': 1e10,
'maxls': 100,
'maxcor': 20,
},
# method='dogleg',
# method='trust-ncg',
method='l-bfgs-b',
)
print("NUMBER HESSIAN MATMULS", HESSIAN_COUNTER['ba'])
print(result)
# plt.plot(all_losses)
# plt.yscale('log')
# plt.show()
# breakpoint()
print(f'NEWTON-CG took {time.time() - start:.3f} seconds')
loss_after_cg = separated_loss(result["x"])
print(f'LOSS {loss_after_cg:.2e}')
if loss_after_cg < 1e-20:
if loss_after_cg < 1e-25:
break
x_0 = result['x']
# breakpoint()
Expand Down

0 comments on commit b8cae2b

Please sign in to comment.