Skip to content

Commit

Permalink
GH-16312: fix wrong error raised by duplicated/conflicted constraints.
Browse files Browse the repository at this point in the history
  • Loading branch information
wendycwong committed Jul 1, 2024
1 parent 64a7c5d commit fc5ac52
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 13 deletions.
24 changes: 14 additions & 10 deletions h2o-algos/src/main/java/hex/glm/ConstrainedGLMUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -137,22 +137,27 @@ public static int[] extractBetaConstraints(ComputationState state, String[] coef
List<LinearConstraints> equalityC = new ArrayList<>();
List<LinearConstraints> lessThanEqualToC = new ArrayList<>();
List<Integer> betaIndexOnOff = new ArrayList<>();
boolean bothEndsPresent = (betaC._betaUB != null) && (betaC._betaLB != null);
boolean lowerEndPresentOnly = (betaC._betaUB == null) && (betaC._betaLB != null);
boolean upperEndPresentOnly = (betaC._betaUB != null) && (betaC._betaLB == null);
if (betaC._betaLB != null) {
int numCons = betaC._betaLB.length-1;
for (int index=0; index<numCons; index++) {
if (!Double.isInfinite(betaC._betaUB[index]) && (betaC._betaLB[index] == betaC._betaUB[index])) { // equality constraint
int numCons = betaC._betaLB.length - 1;
for (int index = 0; index < numCons; index++) {
if (bothEndsPresent && !Double.isInfinite(betaC._betaUB[index]) && (betaC._betaLB[index] == betaC._betaUB[index])) { // equality constraint
addBCEqualityConstraint(equalityC, betaC, coefNames, index);
betaIndexOnOff.add(1);
} else if (!Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index]) &&
} else if (bothEndsPresent && !Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index]) &&
(betaC._betaLB[index] < betaC._betaUB[index])) { // low < beta < high, generate two lessThanEqualTo constraints
addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
betaIndexOnOff.add(1);
betaIndexOnOff.add(0);
} else if (Double.isInfinite(betaC._betaUB[index]) && !Double.isInfinite(betaC._betaLB[index])) { // low < beta < infinity
} else if ((lowerEndPresentOnly || (betaC._betaUB != null && Double.isInfinite(betaC._betaUB[index]))) &&
betaC._betaLB != null && !Double.isInfinite(betaC._betaLB[index])) { // low < beta < infinity
addBCGreaterThanConstraint(lessThanEqualToC, betaC, coefNames, index);
betaIndexOnOff.add(1);
} else if (!Double.isInfinite(betaC._betaUB[index]) && Double.isInfinite(betaC._betaLB[index])) { // -infinity < beta < high
} else if ((upperEndPresentOnly || (betaC._betaLB != null && Double.isInfinite(betaC._betaLB[index]))) &&
betaC._betaUB != null && !Double.isInfinite(betaC._betaUB[index])) { // -infinity < beta < high
addBCLessThanConstraint(lessThanEqualToC, betaC, coefNames, index);
betaIndexOnOff.add(1);
}
Expand Down Expand Up @@ -506,11 +511,10 @@ public static boolean extractCoeffNames(List<String> coeffList, LinearConstraint

public static List<String> foundRedundantConstraints(ComputationState state, final double[][] initConstraintMatrix) {
Matrix constMatrix = new Matrix(initConstraintMatrix);
Matrix constMatrixLessConstant = constMatrix.getMatrix(0, constMatrix.getRowDimension() -1, 1, constMatrix.getColumnDimension()-1);
Matrix constMatrixTConstMatrix = constMatrixLessConstant.times(constMatrixLessConstant.transpose());
int rank = constMatrixLessConstant.rank();
Matrix matrixSquare = constMatrix.times(constMatrix.transpose());
int rank = matrixSquare.rank();
if (rank < constMatrix.getRowDimension()) { // redundant constraints are specified
double[][] rMatVal = constMatrixTConstMatrix.qr().getR().getArray();
double[][] rMatVal = matrixSquare.qr().getR().getArray();
List<Double> diag = IntStream.range(0, rMatVal.length).mapToDouble(x->Math.abs(rMatVal[x][x])).boxed().collect(Collectors.toList());
int[] sortedIndices = IntStream.range(0, diag.size()).boxed().sorted((i, j) -> diag.get(i).compareTo(diag.get(j))).mapToInt(ele->ele).toArray();
List<Integer> duplicatedEleIndice = IntStream.range(0, diag.size()-rank).map(x -> sortedIndices[x]).boxed().collect(Collectors.toList());
Expand Down
7 changes: 4 additions & 3 deletions h2o-algos/src/main/java/hex/glm/GLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,8 @@ void checkInitLinearConstraints() {
}
// no regularization for constrainted GLM except during testing
if ((notZeroLambdas(_parms._lambda) || _parms._lambda_search) && !_parms._testCSZeroGram) {
error("lambda or lambda_search", "Regularization is not allowed for constrained GLM.");
error("lambda or lambda_search", "Regularization is not allowed for constrained GLM. Set" +
" lambda to 0.0.");
return;
}
if ("multinomial".equals(_parms._solver) || "ordinal".equals(_parms._solver)) {
Expand Down Expand Up @@ -2353,7 +2354,7 @@ private void fitIRLSM(Solver s) {
* hereby use the word the doc to refere to this document. In particular, we following the algorithm described in
* Section VII (and table titled Algorithm 19.1) of the doc. Not as good as when considering magnitude of gradient.
*/
private void fitIRLSMCS() {
private void fitIRLSMCS9() {
double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
Expand Down Expand Up @@ -2646,7 +2647,7 @@ private void fitIRLSMCS8() {
// original implementation but will not quit when magnitude of gradient is small. If exit condition is triggered
// (either ls failed or no progress is made, if the magnitude of gradient is small, we will exit thw while loop
// but will arrive at the part to change the constrained parameters. This seems to help.
private void fitIRLSMCS4() {
private void fitIRLSMCS() {
double[] betaCnd = _checkPointFirstIter ? _model._betaCndCheckpoint : _state.beta();
double[] tempBeta = _parms._separate_linear_beta ? new double[betaCnd.length] : null;
List<String> coefNames = Arrays.stream(_state.activeData()._coefNames).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator as glm
from tests import pyunit_utils
import numpy as np
import pandas as pd

# no need to check anything, this test just needs to run into completion duplicating/conflicting constraints
def data_prep(seed):
np.random.seed(seed)
x1 = np.random.normal(0, 10, 100000)
x2 = np.random.normal(10, 100 , 100000)
x3 = np.random.normal(20, 200, 100000)
x4 = np.random.normal(30, 3000, 100000)
x5 = np.random.normal(400, 4000, 100000)

y_raw = np.sin(x1)*100 + np.sin(x2)*100 + x3/20 + x3/30 + x5/400
y = np.random.normal(y_raw, 20)

data = {
'x1': x1,
'x2': x2,
'x3': x3,
'x4': x4,
'x5': x5,
'y': y,
}
return h2o.H2OFrame(pd.DataFrame(data))

def test_bad_lambda_specification():
train_data = data_prep(123)
family = 'gaussian'
link = 'identity'
nfolds = 0
lambda_ = 0.0
seed = 1234
calc_like = True
compute_p_values = True
solver = 'irlsm'
predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
response = "y"

linear_constraints2 = []

name = "x2"
values = 1
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = -1
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = 1
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x4"
values = -1
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x2"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x4"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

linear_constraints = h2o.H2OFrame(linear_constraints2)
linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])

params = {
"family" : family,
"link": link,
"lambda_" : lambda_,
"seed" : seed,
"nfolds" : nfolds,
"compute_p_values" : compute_p_values,
"calc_like" : calc_like,
"solver" : solver,
"linear_constraints": linear_constraints
}

model = glm(**params)
model.train(x = predictors, y = response, training_frame = train_data)
print(model.coef())
coef_constrained = model.coef()
print(glm.getConstraintsInfo(model))

params = {
"family" : family,
"link": link,
"lambda_" : lambda_,
"seed" : seed,
"nfolds" : nfolds,
"compute_p_values" : compute_p_values,
"calc_like" : calc_like,
"solver" : solver,
}

model_no_constraints = glm(**params)
model_no_constraints.train(x = predictors, y = response, training_frame = train_data)
coef_no_constraints = model_no_constraints.coef()
print(coef_no_constraints)

if __name__ == "__main__":
pyunit_utils.standalone_test(test_bad_lambda_specification)
else:
test_bad_lambda_specification()
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import h2o
from h2o.estimators.glm import H2OGeneralizedLinearEstimator as glm
from tests import pyunit_utils
import numpy as np
import pandas as pd

# no need to check anything, this test just needs to run into completion without NPE error.
def data_prep(seed):
np.random.seed(seed)
x1 = np.random.normal(0, 10, 100000)
x2 = np.random.normal(10, 100 , 100000)
x3 = np.random.normal(20, 200, 100000)
x4 = np.random.normal(30, 3000, 100000)
x5 = np.random.normal(400, 4000, 100000)

y_raw = np.sin(x1)*100 + np.sin(x2)*100 + x3/20 + x3/30 + x5/400
y = np.random.normal(y_raw, 20)

data = {
'x1': x1,
'x2': x2,
'x3': x3,
'x4': x4,
'x5': x5,
'y': y,
}
return h2o.H2OFrame(pd.DataFrame(data))

def test_bad_lambda_specification():
train_data = data_prep(123)
family = 'gaussian'
link = 'identity'
nfolds = 0
lambda_ = 0.0
seed = 1234
calc_like = True
compute_p_values = True
solver = 'irlsm'
predictors = ['x1', 'x2', 'x3', 'x4', 'x5']
response = "y"

linear_constraints2 = []

name = "x2"
values = 1
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = -1
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 0
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = 1
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x4"
values = -1
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 1
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x2"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x3"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "x4"
values = 1
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])

name = "constant"
values = 0
types = "LessThanEqual"
contraint_numbers = 2
linear_constraints2.append([name, values, types, contraint_numbers])


linear_constraints = h2o.H2OFrame(linear_constraints2)
linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])

linear_constraints = h2o.H2OFrame(linear_constraints2)
linear_constraints.set_names(["names", "values", "types", "constraint_numbers"])
# check lower bound of beta constraint will not generate error but lambda will.
params = {
"family" : family,
"link": link,
"lambda_" : lambda_,
"seed" : seed,
"nfolds" : nfolds,
"compute_p_values" : compute_p_values,
"calc_like" : calc_like,
"solver" : solver,
"linear_constraints": linear_constraints
}

model = glm(**params)
model.train(x = predictors, y = response, training_frame = train_data)
print(model.coef())


if __name__ == "__main__":
pyunit_utils.standalone_test(test_bad_lambda_specification)
else:
test_bad_lambda_specification()
Loading

0 comments on commit fc5ac52

Please sign in to comment.