Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hyperoptimizing with Optuna #1338

Merged
merged 17 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 58 additions & 53 deletions examples/dbi/dbi_tutorial_basic_intro.ipynb

Large diffs are not rendered by default.

1,378 changes: 801 additions & 577 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ scipy = "^1.10.1"
sympy = "^1.11.1"
cma = "^3.3.0"
joblib = "^1.2.0"
hyperopt = "^0.2.7"
# `setuptools` is only required because undeclared by `hyperopt`
setuptools = ">=69.1.1,<71.0.0"
optuna = "^4.0.0"
tabulate = "^0.9.0"
openqasm3 = { version = ">=0.5.0", extras = ["parser"] }
numpy = "^1.26.4"
Expand Down
3 changes: 2 additions & 1 deletion src/qibo/models/dbi/double_bracket.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Optional

import numpy as np
import optuna

from qibo.config import raise_error
from qibo.hamiltonians import Hamiltonian
Expand Down Expand Up @@ -55,7 +56,7 @@ class DoubleBracketScheduling(Enum):
"""Define the DBI scheduling strategies."""

hyperopt = hyperopt_step
"""Use hyperopt package."""
"""Use optuna package to hyperoptimize the DBI step."""
grid_search = grid_search_step
"""Use greedy grid search."""
polynomial_approximation = polynomial_step
Expand Down
84 changes: 20 additions & 64 deletions src/qibo/models/dbi/utils_dbr_strategies.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import hyperopt
import optuna

from qibo.backends import _check_backend
from qibo.models.dbi.double_bracket import *
Expand Down Expand Up @@ -155,7 +155,7 @@ def gradient_descent(
lr_max: float = 1,
max_evals: int = 100,
space: callable = None,
optimizer: callable = hyperopt.tpe,
optimizer: optuna.samplers.BaseSampler = optuna.samplers.TPESampler(),
verbose: bool = False,
backend=None,
):
Expand All @@ -173,61 +173,26 @@ def gradient_descent(
normalize (bool, optional): option to normalize the diagonal operator. Defaults to False.
lr_min (float, optional): the minimal gradient step. Defaults to 1e-5.
lr_max (float, optional): the maximal gradient step. Defaults to 1.
max_evals (int, optional): maximum number of evaluations for `lr` using `hyperopt`. Defaults to 100.
space (callable, optional): evalutation space for `hyperopt`. Defaults to None.
optimizer (callable, optional): optimizer option for `hyperopt`. Defaults to `hyperopt.tpe`.
verbose (bool, optional): option for printing `hyperopt` process. Defaults to False.
max_evals (int, optional): maximum number of evaluations for `lr` using `optuna`. Defaults to 100.
space (callable, optional): evalutation space for `optuna`. Defaults to None.
optimizer (optuna.samplers.BaseSampler, optional): optimizer option for `optuna`. Defaults to `TPESampler()`.
verbose (bool, optional): option for printing `optuna` process. Defaults to False.

Returns:
loss_hist (list): list of history losses of `dbi_object` throughout the double bracket rotations.
d_params_hist (list): list of history of `d` parameters after gradient descent.
s_hist (list): list of history of optimal `s` found.
Example:
from qibo import set_backend
from qibo.hamiltonians import Hamiltonian
from qibo.models.dbi.double_bracket import *
from qibo.models.dbi.utils import *
from qibo.models.dbi.utils_dbr_strategies import gradient_descent
from qibo.quantum_info import random_hermitian

nqubits = 3
NSTEPS = 5
set_backend("numpy")
h0 = random_hermitian(2**nqubits)
dbi = DoubleBracketIteration(
Hamiltonian(nqubits, h0),
mode=DoubleBracketGeneratorType.single_commutator,
scheduling=DoubleBracketScheduling.hyperopt,
cost=DoubleBracketCostFunction.off_diagonal_norm,
)
initial_off_diagonal_norm = dbi.off_diagonal_norm
pauli_operator_dict = generate_pauli_operator_dict(
nqubits, parameterization_order=1
)
pauli_operators = list(pauli_operator_dict.values())
# let initial d be approximation of $\Delta(H)
d_coef_pauli = decompose_into_Pauli_basis(
dbi.diagonal_h_matrix, pauli_operators=pauli_operators
)
d_pauli = sum([d_coef_pauli[i] * pauli_operators[i] for i in range(nqubits)])
loss_hist_pauli, d_params_hist_pauli, s_hist_pauli = gradient_descent(
dbi,
NSTEPS,
d_coef_pauli,
ParameterizationTypes.pauli,
pauli_operator_dict=pauli_operator_dict,
)
"""
backend = _check_backend(backend)

nqubits = dbi_object.nqubits
# TODO: write tests where this condition applies
if (
parameterization is ParameterizationTypes.pauli and pauli_operator_dict is None
): # pragma: no cover
pauli_operator_dict = generate_pauli_operator_dict(
nqubits=nqubits, parameterization_order=pauli_parameterization_order
)

d = params_to_diagonal_operator(
d_params,
nqubits,
Expand All @@ -236,21 +201,14 @@ def gradient_descent(
normalize=normalize,
backend=backend,
)

loss_hist = [dbi_object.loss(0.0, d=d)]
d_params_hist = [d_params]
s_hist = [0]
# TODO: write tests where this condition applies
if (
parameterization is ParameterizationTypes.pauli and pauli_operator_dict is None
): # pragma: no cover
pauli_operator_dict = generate_pauli_operator_dict(
nqubits=nqubits,
parameterization_order=pauli_parameterization_order,
backend=backend,
)
# first step

s = dbi_object.choose_step(d=d)
dbi_object(step=s, d=d)

for _ in range(iterations):
grad = gradient_numerical(
dbi_object,
Expand All @@ -262,8 +220,8 @@ def gradient_descent(
backend=backend,
)

# set up hyperopt to find optimal lr
def func_loss_to_lr(lr):
def func_loss_to_lr(trial):
lr = trial.suggest_loguniform("lr", lr_min, lr_max)
d_params_eval = [d_params[j] - grad[j] * lr for j in range(len(grad))]
d_eval = params_to_diagonal_operator(
d_params_eval,
Expand All @@ -275,17 +233,14 @@ def func_loss_to_lr(lr):
)
return dbi_object.loss(step=s, d=d_eval)

if space is None:
space = hyperopt.hp.loguniform("lr", np.log(lr_min), np.log(lr_max))
# create a study using the specified optimizer (sampler)
study = optuna.create_study(sampler=optimizer, direction="minimize")

best = hyperopt.fmin(
fn=func_loss_to_lr,
space=space,
algo=optimizer.suggest,
max_evals=max_evals,
verbose=verbose,
)
lr = best["lr"]
# optimize the function
study.optimize(func_loss_to_lr, n_trials=max_evals)

# get the best learning rate
lr = study.best_params["lr"]

d_params = [d_params[j] - grad[j] * lr for j in range(len(grad))]
d = params_to_diagonal_operator(
Expand All @@ -303,4 +258,5 @@ def func_loss_to_lr(lr):
loss_hist.append(dbi_object.loss(0.0, d=d))
d_params_hist.append(d_params)
s_hist.append(s)

return loss_hist, d_params_hist, s_hist
51 changes: 24 additions & 27 deletions src/qibo/models/dbi/utils_scheduling.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import math
from functools import partial
from typing import Optional

import hyperopt
import numpy as np
import optuna

error = 1e-3

Expand Down Expand Up @@ -41,47 +40,45 @@ def grid_search_step(


def hyperopt_step(
dbi_object,
self,
step_min: float = 1e-5,
step_max: float = 1,
max_evals: int = 100,
space: callable = None,
optimizer: callable = None,
max_evals: int = 1000,
look_ahead: int = 1,
d: Optional[np.array] = None,
verbose: bool = False,
d: np.array = None,
optimizer: optuna.samplers.BaseSampler = None,
):
"""
Optimize iteration step using hyperopt.
Optimize iteration step using Optuna.

Args:
step_min: lower bound of the search grid;
step_max: upper bound of the search grid;
max_evals: maximum number of iterations done by the hyperoptimizer;
space: see hyperopt.hp possibilities;
optimizer: see hyperopt algorithms;
max_evals: maximum number of trials done by the optimizer;
look_ahead: number of iteration steps to compute the loss function;
d: diagonal operator for generating double-bracket iterations.
verbose: level of verbosity;
d: diagonal operator for generating double-bracket iterations;
optimizer: Optuna sampler for the search algorithm (e.g.,
optuna.samplers.TPESampler()).
See: https://optuna.readthedocs.io/en/stable/reference/samplers/index.html

Returns:
(float): optimized best iteration step (minimizing loss function).
(float): optimized best iteration step.
"""
if space is None:
space = hyperopt.hp.uniform
optuna.logging.set_verbosity(optuna.logging.WARNING)

def objective(trial):
step = trial.suggest_float("step", step_min, step_max)
return self.loss(step, d=d, look_ahead=look_ahead)

if optimizer is None:
optimizer = hyperopt.tpe
if d is None:
d = dbi_object.diagonal_h_matrix
optimizer = optuna.samplers.TPESampler()

space = space("step", step_min, step_max)
study = optuna.create_study(direction="minimize", sampler=optimizer)
study.optimize(objective, n_trials=max_evals, show_progress_bar=verbose)

best = hyperopt.fmin(
fn=partial(dbi_object.loss, d=d, look_ahead=look_ahead),
space=space,
algo=optimizer.suggest,
max_evals=max_evals,
show_progressbar=False,
)
return best["step"]
return study.best_params["step"]


def polynomial_step(
Expand Down