diff --git a/docs/index.html b/docs/index.html
index e85db7dd..10b00db7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -46,7 +46,7 @@
Popular interpretable models
preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
-Install with pip install git+https://github.com/csinva/imodels
(see here for help). Contains the following models:
+Install with pip install imodels
(see here for help). Contains the following models:
@@ -139,7 +139,7 @@ References
Review on evaluating interpretability: doshi-velez & kim 2017, pdf
-Reference implementations (also linked above): the code here heavily derives from (and in some case is just a wrapper for) the wonderful work of previous projects. We seek to to extract out, combine, and maintain select relevant parts of these projects.
+- Reference implementations (also linked above): the code here heavily derives from the wonderful work of previous projects. We seek to to extract out, unify, and maintain key parts of these projects.
- sklearn-expertsys - by @tmadl and @kenben based on original code by Ben Letham
- rulefit - by @christophM
- skope-rules - by the skope-rules team (including @ngoix, @floriangardin, @datajms, Bibi Ndiaye, Ronan Gautier)
diff --git a/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html b/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
index 766b3e8a..76aa5720 100644
--- a/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
+++ b/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
@@ -178,21 +178,22 @@ Module imodels.rule_list.bayesian_rule_list.bayesian_rul
y = y.values
X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
permsdic = defaultdict(default_permsdic) # We will store here the MCMC results
-
data = list(X[:])
-
+
# Now find frequent itemsets
- # Mine separately for each class
- data_pos = [x for i, x in enumerate(data) if y[i] == 0]
- data_neg = [x for i, x in enumerate(data) if y[i] == 1]
- assert len(data_pos) + len(data_neg) == len(data)
- X_df = pd.DataFrame(X, columns=feature_labels)
- itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+ X_colname_removed = data.copy()
+ for i in range(len(data)):
+ X_colname_removed[i] = list(map(lambda s: s.split(' : ')[1], X_colname_removed[i]))
+
+ X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+ X_df_onehot = pd.get_dummies(X_df_categorical)
+ onehot_features = X_df_onehot.columns
+
+ itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
- itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+ itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), 'rules mined')
@@ -200,12 +201,13 @@ Module imodels.rule_list.bayesian_rule_list.bayesian_rul
# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
- for c in X_df.columns:
- X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
+ for c in X_df_onehot.columns:
+ X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
- X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+ X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
# now form lhs_len
@@ -290,15 +292,20 @@ Module imodels.rule_list.bayesian_rule_list.bayesian_rul
return "(Untrained RuleListClassifier)"
def _to_itemset_indices(self, data):
+ X_colname_removed = data.copy()
+ for i in range(len(data)):
+ X_colname_removed[i] = list(map(lambda s: s.split(' : ')[1], X_colname_removed[i]))
+ X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
+ X_df_onehot = pd.get_dummies(X_df_categorical)
+
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
- X_df = pd.DataFrame(data, columns=self.feature_labels)
- for c in X_df.columns:
- X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
+ for c in X_df_onehot.columns:
+ X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j > 0:
- X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+ X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X
def predict_proba(self, X):
@@ -322,7 +329,7 @@ Module imodels.rule_list.bayesian_rule_list.bayesian_rul
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
- D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+ D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X
@@ -536,21 +543,22 @@ Parameters
y = y.values
X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
permsdic = defaultdict(default_permsdic) # We will store here the MCMC results
-
data = list(X[:])
-
+
# Now find frequent itemsets
- # Mine separately for each class
- data_pos = [x for i, x in enumerate(data) if y[i] == 0]
- data_neg = [x for i, x in enumerate(data) if y[i] == 1]
- assert len(data_pos) + len(data_neg) == len(data)
- X_df = pd.DataFrame(X, columns=feature_labels)
- itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+ X_colname_removed = data.copy()
+ for i in range(len(data)):
+ X_colname_removed[i] = list(map(lambda s: s.split(' : ')[1], X_colname_removed[i]))
+
+ X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+ X_df_onehot = pd.get_dummies(X_df_categorical)
+ onehot_features = X_df_onehot.columns
+
+ itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
- itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+ itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), 'rules mined')
@@ -558,12 +566,13 @@ Parameters
# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
- for c in X_df.columns:
- X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
+ for c in X_df_onehot.columns:
+ X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
- X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+ X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
# now form lhs_len
@@ -648,15 +657,20 @@ Parameters
return "(Untrained RuleListClassifier)"
def _to_itemset_indices(self, data):
+ X_colname_removed = data.copy()
+ for i in range(len(data)):
+ X_colname_removed[i] = list(map(lambda s: s.split(' : ')[1], X_colname_removed[i]))
+ X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
+ X_df_onehot = pd.get_dummies(X_df_categorical)
+
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
- X_df = pd.DataFrame(data, columns=self.feature_labels)
- for c in X_df.columns:
- X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
+ for c in X_df_onehot.columns:
+ X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j > 0:
- X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+ X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X
def predict_proba(self, X):
@@ -680,7 +694,7 @@ Parameters
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
- D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+ D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X
@@ -808,21 +822,22 @@ Returns
y = y.values
X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
permsdic = defaultdict(default_permsdic) # We will store here the MCMC results
-
data = list(X[:])
-
+
# Now find frequent itemsets
- # Mine separately for each class
- data_pos = [x for i, x in enumerate(data) if y[i] == 0]
- data_neg = [x for i, x in enumerate(data) if y[i] == 1]
- assert len(data_pos) + len(data_neg) == len(data)
- X_df = pd.DataFrame(X, columns=feature_labels)
- itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+ X_colname_removed = data.copy()
+ for i in range(len(data)):
+ X_colname_removed[i] = list(map(lambda s: s.split(' : ')[1], X_colname_removed[i]))
+
+ X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+ X_df_onehot = pd.get_dummies(X_df_categorical)
+ onehot_features = X_df_onehot.columns
+
+ itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
- itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+ itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), 'rules mined')
@@ -830,12 +845,13 @@ Returns
# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
- for c in X_df.columns:
- X_df[c] = [c if x == 1 else '' for x in list(X_df[c])]
+ for c in X_df_onehot.columns:
+ X_df_onehot[c] = [c if x == 1 else '' for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
- X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+ X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
# now form lhs_len
@@ -953,7 +969,7 @@ Returns
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
- D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+ D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X
diff --git a/notebooks/1_model_based.ipynb b/notebooks/1_model_based.ipynb
index b6b7ab4f..d20bd1cb 100644
--- a/notebooks/1_model_based.ipynb
+++ b/notebooks/1_model_based.ipynb
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 7,
"metadata": {
"pycharm": {
"is_executing": false
@@ -271,28 +271,32 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
- "name": "stdout",
+ "name": "stderr",
"output_type": "stream",
"text": [
- "training...\n"
+ "/accounts/projects/vision/.local/lib/python3.7/site-packages/sklearn/datasets/_openml.py:376: UserWarning: Multiple active versions of the dataset matching the name diabetes exist. Versions may be fundamentally different, returning version 1.\n",
+ " \" {version}.\".format(name=name, version=res[0]['version']))\n"
]
},
{
- "ename": "ValueError",
- "evalue": "The allowed values for a DataFrame are True, False, 0, 1. Found value #Pregnant : 6.5_to_inf",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'training...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 13\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBayesianRuleListClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_iter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclass1label\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"diabetes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeature_labels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 15\u001b[0m \u001b[0mpreds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"RuleListClassifier Accuracy:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mpreds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Learned interpretable model:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/chandan/imodels/imodels/rule_list/bayesian_rule_list/bayesian_rule_list.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, feature_labels, undiscretized_features, verbose)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0mX_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m \u001b[0mitemsets_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfpgrowth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_support\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mminsupport\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_len\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxcardinality\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 168\u001b[0m \u001b[0mitemsets_indices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitemsets_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0mitemsets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0minds\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitemsets_indices\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/.local/lib/python3.7/site-packages/mlxtend/frequent_patterns/fpgrowth.py\u001b[0m in \u001b[0;36mfpgrowth\u001b[0;34m(df, min_support, use_colnames, max_len, verbose)\u001b[0m\n\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \"\"\"\n\u001b[0;32m---> 73\u001b[0;31m \u001b[0mfpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalid_input_check\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 75\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmin_support\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0.\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/.local/lib/python3.7/site-packages/mlxtend/frequent_patterns/fpcommon.py\u001b[0m in \u001b[0;36mvalid_input_check\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 114\u001b[0m s = ('The allowed values for a DataFrame'\n\u001b[1;32m 115\u001b[0m ' are True, False, 0, 1. Found value %s' % (val))\n\u001b[0;32m--> 116\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: The allowed values for a DataFrame are True, False, 0, 1. Found value #Pregnant : 6.5_to_inf"
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "training...\n",
+ "RuleListClassifier Accuracy: 0.671875 Learned interpretable model:\n",
+ " Trained RuleListClassifier for detecting diabetes\n",
+ "==================================================\n",
+ "IF #Pregnant_-inf_to_6.5 AND Glucose concentration test_-inf_to_122.5 THEN probability of diabetes: 9.9% (4.9%-16.4%)\n",
+ "ELSE IF Body mass index_30.9_to_inf THEN probability of diabetes: 69.7% (58.2%-80.1%)\n",
+ "ELSE IF Triceps skin fold thickness(mm)_All THEN probability of diabetes: 38.7% (22.7%-56.1%)\n",
+ "ELSE IF Age (years)_26.5_to_inf THEN probability of diabetes: 50.0% (2.5%-97.5%)\n",
+ "ELSE probability of diabetes: 50.0% (2.5%-97.5%)\n",
+ "=================================================\n",
+ "\n"
]
}
],
diff --git a/readme.md b/readme.md
index 682f7953..2c684365 100644
--- a/readme.md
+++ b/readme.md
@@ -30,7 +30,7 @@ preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
```
-Install with `pip install git+https://github.com/csinva/imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help). Contains the following models:
+Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help). Contains the following models:
| Model | Reference | Description |
| :--------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
@@ -70,7 +70,7 @@ Demos are contained in the [notebooks](notebooks) folder.
- Interpretable ML book: molnar 2019, [pdf](https://christophm.github.io/interpretable-ml-book/)
- Case for interpretable models rather than post-hoc explanation: rudin 2019, [pdf](https://arxiv.org/pdf/1811.10154.pdf)
- Review on evaluating interpretability: doshi-velez & kim 2017, [pdf](https://arxiv.org/pdf/1702.08608.pdf)
-- Reference implementations (also linked above): the code here heavily derives from (and in some case is just a wrapper for) the wonderful work of previous projects. We seek to to extract out, combine, and maintain select relevant parts of these projects.
+- Reference implementations (also linked above): the code here heavily derives from the wonderful work of previous projects. We seek to to extract out, unify, and maintain key parts of these projects.
- [sklearn-expertsys](https://github.com/tmadl/sklearn-expertsys) - by [@tmadl](https://github.com/tmadl) and [@kenben](https://github.com/kenben) based on original code by [Ben Letham](http://lethalletham.com/)
- [rulefit](https://github.com/christophM/rulefit) - by [@christophM](https://github.com/christophM)
- [skope-rules](https://github.com/scikit-learn-contrib/skope-rules) - by the [skope-rules team](https://github.com/scikit-learn-contrib/skope-rules/blob/master/AUTHORS.rst) (including [@ngoix](https://github.com/ngoix), [@floriangardin](https://github.com/floriangardin), [@datajms](https://github.com/datajms), [Bibi Ndiaye](), [Ronan Gautier]())
diff --git a/setup.py b/setup.py
index 0d1fd457..ae78dcee 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
setuptools.setup(
name="imodels",
- version="0.2.3",
+ version="0.2.4",
author="Chandan Singh",
author_email="chandan_singh@berkeley.edu",
description="Implementations of various interpretable models",