diff --git a/docs/index.html b/docs/index.html
index e85db7dd..10b00db7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -46,7 +46,7 @@ <h2 id="popular-interpretable-models">Popular interpretable models</h2>
 preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
 preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
 </code></pre>
-<p>Install with <code>pip install git+https://github.com/csinva/imodels</code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). Contains the following models:</p>
+<p>Install with <code>pip install imodels</code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). Contains the following models:</p>
 <table>
 <thead>
 <tr>
@@ -139,7 +139,7 @@ <h2 id="references">References</h2>
 <li>Review on evaluating interpretability: doshi-velez &amp; kim 2017, <a href="https://arxiv.org/pdf/1702.08608.pdf">pdf</a></li>
 </ul>
 </li>
-<li>Reference implementations (also linked above): the code here heavily derives from (and in some case is just a wrapper for) the wonderful work of previous projects. We seek to to extract out, combine, and maintain select relevant parts of these projects.<ul>
+<li>Reference implementations (also linked above): the code here heavily derives from the wonderful work of previous projects. We seek to to extract out, unify, and maintain key parts of these projects.<ul>
 <li><a href="https://github.com/tmadl/sklearn-expertsys">sklearn-expertsys</a> - by <a href="https://github.com/tmadl">@tmadl</a> and <a href="https://github.com/kenben">@kenben</a> based on original code by <a href="http://lethalletham.com/">Ben Letham</a></li>
 <li><a href="https://github.com/christophM/rulefit">rulefit</a> - by <a href="https://github.com/christophM">@christophM</a></li>
 <li><a href="https://github.com/scikit-learn-contrib/skope-rules">skope-rules</a> - by the <a href="https://github.com/scikit-learn-contrib/skope-rules/blob/master/AUTHORS.rst">skope-rules team</a> (including <a href="https://github.com/ngoix">@ngoix</a>, <a href="https://github.com/floriangardin">@floriangardin</a>, <a href="https://github.com/datajms">@datajms</a>, <a href="">Bibi Ndiaye</a>, <a href="">Ronan Gautier</a>)</li>
diff --git a/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html b/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
index 766b3e8a..76aa5720 100644
--- a/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
+++ b/docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
@@ -178,21 +178,22 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
             y = y.values
 
         X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
         permsdic = defaultdict(default_permsdic)  # We will store here the MCMC results
-
         data = list(X[:])
-        
+
         # Now find frequent itemsets
-        # Mine separately for each class
-        data_pos = [x for i, x in enumerate(data) if y[i] == 0]
-        data_neg = [x for i, x in enumerate(data) if y[i] == 1]
-        assert len(data_pos) + len(data_neg) == len(data)
 
-        X_df = pd.DataFrame(X, columns=feature_labels)
-        itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+        X_colname_removed = data.copy()
+        for i in range(len(data)):
+            X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
+
+        X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+        X_df_onehot = pd.get_dummies(X_df_categorical)
+        onehot_features = X_df_onehot.columns
+
+        itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
         itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
-        itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+        itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
         itemsets = list(map(tuple, itemsets))
         if self.verbose:
             print(len(itemsets), &#39;rules mined&#39;)
@@ -200,12 +201,13 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
 
         # Now form the data-vs.-lhs set
         # X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
-        for c in X_df.columns:
-            X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
+        for c in X_df_onehot.columns:
+            X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
         X = [{}] * (len(itemsets) + 1)
         X[0] = set(range(len(data)))  # the default rule satisfies all data
         for (j, lhs) in enumerate(itemsets):
-            X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+            X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
         
         
         # now form lhs_len
@@ -290,15 +292,20 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
             return &#34;(Untrained RuleListClassifier)&#34;
 
     def _to_itemset_indices(self, data):
+        X_colname_removed = data.copy()
+        for i in range(len(data)):
+            X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
+        X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
+        X_df_onehot = pd.get_dummies(X_df_categorical)
+
         # X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
-        X_df = pd.DataFrame(data, columns=self.feature_labels)
-        for c in X_df.columns:
-            X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
+        for c in X_df_onehot.columns:
+            X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
         X = [set() for j in range(len(self.itemsets))]
         X[0] = set(range(len(data)))  # the default rule satisfies all data
         for (j, lhs) in enumerate(self.itemsets):
             if j &gt; 0:
-                X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+                X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
         return X
 
     def predict_proba(self, X):
@@ -322,7 +329,7 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
         if self.discretizer:
             self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
             self.discretizer.apply_cutpoints()
-            D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+            D = self._prepend_feature_labels(np.array(self.discretizer._data))
         else:
             D = X
 
@@ -536,21 +543,22 @@ <h2 id="parameters">Parameters</h2>
             y = y.values
 
         X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
         permsdic = defaultdict(default_permsdic)  # We will store here the MCMC results
-
         data = list(X[:])
-        
+
         # Now find frequent itemsets
-        # Mine separately for each class
-        data_pos = [x for i, x in enumerate(data) if y[i] == 0]
-        data_neg = [x for i, x in enumerate(data) if y[i] == 1]
-        assert len(data_pos) + len(data_neg) == len(data)
 
-        X_df = pd.DataFrame(X, columns=feature_labels)
-        itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+        X_colname_removed = data.copy()
+        for i in range(len(data)):
+            X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
+
+        X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+        X_df_onehot = pd.get_dummies(X_df_categorical)
+        onehot_features = X_df_onehot.columns
+
+        itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
         itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
-        itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+        itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
         itemsets = list(map(tuple, itemsets))
         if self.verbose:
             print(len(itemsets), &#39;rules mined&#39;)
@@ -558,12 +566,13 @@ <h2 id="parameters">Parameters</h2>
 
         # Now form the data-vs.-lhs set
         # X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
-        for c in X_df.columns:
-            X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
+        for c in X_df_onehot.columns:
+            X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
         X = [{}] * (len(itemsets) + 1)
         X[0] = set(range(len(data)))  # the default rule satisfies all data
         for (j, lhs) in enumerate(itemsets):
-            X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+            X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
         
         
         # now form lhs_len
@@ -648,15 +657,20 @@ <h2 id="parameters">Parameters</h2>
             return &#34;(Untrained RuleListClassifier)&#34;
 
     def _to_itemset_indices(self, data):
+        X_colname_removed = data.copy()
+        for i in range(len(data)):
+            X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
+        X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
+        X_df_onehot = pd.get_dummies(X_df_categorical)
+
         # X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
-        X_df = pd.DataFrame(data, columns=self.feature_labels)
-        for c in X_df.columns:
-            X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
+        for c in X_df_onehot.columns:
+            X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
         X = [set() for j in range(len(self.itemsets))]
         X[0] = set(range(len(data)))  # the default rule satisfies all data
         for (j, lhs) in enumerate(self.itemsets):
             if j &gt; 0:
-                X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+                X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
         return X
 
     def predict_proba(self, X):
@@ -680,7 +694,7 @@ <h2 id="parameters">Parameters</h2>
         if self.discretizer:
             self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
             self.discretizer.apply_cutpoints()
-            D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+            D = self._prepend_feature_labels(np.array(self.discretizer._data))
         else:
             D = X
 
@@ -808,21 +822,22 @@ <h2 id="returns">Returns</h2>
         y = y.values
 
     X, y = self._setdata(X, y, feature_labels, undiscretized_features)
-
     permsdic = defaultdict(default_permsdic)  # We will store here the MCMC results
-
     data = list(X[:])
-    
+
     # Now find frequent itemsets
-    # Mine separately for each class
-    data_pos = [x for i, x in enumerate(data) if y[i] == 0]
-    data_neg = [x for i, x in enumerate(data) if y[i] == 1]
-    assert len(data_pos) + len(data_neg) == len(data)
 
-    X_df = pd.DataFrame(X, columns=feature_labels)
-    itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
+    X_colname_removed = data.copy()
+    for i in range(len(data)):
+        X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
+
+    X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
+    X_df_onehot = pd.get_dummies(X_df_categorical)
+    onehot_features = X_df_onehot.columns
+
+    itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
     itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
-    itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
+    itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
     itemsets = list(map(tuple, itemsets))
     if self.verbose:
         print(len(itemsets), &#39;rules mined&#39;)
@@ -830,12 +845,13 @@ <h2 id="returns">Returns</h2>
 
     # Now form the data-vs.-lhs set
     # X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
-    for c in X_df.columns:
-        X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
+    for c in X_df_onehot.columns:
+        X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
     X = [{}] * (len(itemsets) + 1)
     X[0] = set(range(len(data)))  # the default rule satisfies all data
     for (j, lhs) in enumerate(itemsets):
-        X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
+        X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
+
     
     
     # now form lhs_len
@@ -953,7 +969,7 @@ <h2 id="returns">Returns</h2>
     if self.discretizer:
         self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
         self.discretizer.apply_cutpoints()
-        D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
+        D = self._prepend_feature_labels(np.array(self.discretizer._data))
     else:
         D = X
 
diff --git a/notebooks/1_model_based.ipynb b/notebooks/1_model_based.ipynb
index b6b7ab4f..d20bd1cb 100644
--- a/notebooks/1_model_based.ipynb
+++ b/notebooks/1_model_based.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "metadata": {
     "pycharm": {
      "is_executing": false
@@ -271,28 +271,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "training...\n"
+      "/accounts/projects/vision/.local/lib/python3.7/site-packages/sklearn/datasets/_openml.py:376: UserWarning: Multiple active versions of the dataset matching the name diabetes exist. Versions may be fundamentally different, returning version 1.\n",
+      "  \" {version}.\".format(name=name, version=res[0]['version']))\n"
      ]
     },
     {
-     "ename": "ValueError",
-     "evalue": "The allowed values for a DataFrame are True, False, 0, 1. Found value #Pregnant : 6.5_to_inf",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-4-dc4751296384>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'training...'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[0mmodel\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBayesianRuleListClassifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmax_iter\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclass1label\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"diabetes\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 14\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfeature_labels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     15\u001b[0m \u001b[0mpreds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"RuleListClassifier Accuracy:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0my_test\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mpreds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Learned interpretable model:\\n\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/chandan/imodels/imodels/rule_list/bayesian_rule_list/bayesian_rule_list.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, y, feature_labels, undiscretized_features, verbose)\u001b[0m\n\u001b[1;32m    165\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    166\u001b[0m         \u001b[0mX_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 167\u001b[0;31m         \u001b[0mitemsets_df\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfpgrowth\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_df\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmin_support\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mminsupport\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_len\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmaxcardinality\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    168\u001b[0m         \u001b[0mitemsets_indices\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mtuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0ms\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitemsets_df\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    169\u001b[0m         \u001b[0mitemsets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfeature_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0minds\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitemsets_indices\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/.local/lib/python3.7/site-packages/mlxtend/frequent_patterns/fpgrowth.py\u001b[0m in \u001b[0;36mfpgrowth\u001b[0;34m(df, min_support, use_colnames, max_len, verbose)\u001b[0m\n\u001b[1;32m     71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     72\u001b[0m     \"\"\"\n\u001b[0;32m---> 73\u001b[0;31m     \u001b[0mfpc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalid_input_check\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     74\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     75\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mmin_support\u001b[0m \u001b[0;34m<=\u001b[0m \u001b[0;36m0.\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;32m~/.local/lib/python3.7/site-packages/mlxtend/frequent_patterns/fpcommon.py\u001b[0m in \u001b[0;36mvalid_input_check\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m    114\u001b[0m             s = ('The allowed values for a DataFrame'\n\u001b[1;32m    115\u001b[0m                  ' are True, False, 0, 1. Found value %s' % (val))\n\u001b[0;32m--> 116\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    117\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mValueError\u001b[0m: The allowed values for a DataFrame are True, False, 0, 1. Found value #Pregnant : 6.5_to_inf"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "training...\n",
+      "RuleListClassifier Accuracy: 0.671875 Learned interpretable model:\n",
+      " Trained RuleListClassifier for detecting diabetes\n",
+      "==================================================\n",
+      "IF #Pregnant_-inf_to_6.5 AND Glucose concentration test_-inf_to_122.5 THEN probability of diabetes: 9.9% (4.9%-16.4%)\n",
+      "ELSE IF Body mass index_30.9_to_inf THEN probability of diabetes: 69.7% (58.2%-80.1%)\n",
+      "ELSE IF Triceps skin fold thickness(mm)_All THEN probability of diabetes: 38.7% (22.7%-56.1%)\n",
+      "ELSE IF Age (years)_26.5_to_inf THEN probability of diabetes: 50.0% (2.5%-97.5%)\n",
+      "ELSE probability of diabetes: 50.0% (2.5%-97.5%)\n",
+      "=================================================\n",
+      "\n"
      ]
     }
    ],
diff --git a/readme.md b/readme.md
index 682f7953..2c684365 100644
--- a/readme.md
+++ b/readme.md
@@ -30,7 +30,7 @@ preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
 preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
 ```
 
-Install with `pip install git+https://github.com/csinva/imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help). Contains the following models:
+Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help). Contains the following models:
 
 | Model                       | Reference                                                    | Description                                                  |
 | :--------------------------- | ------------------------------------------------------------ | ------------------------------------------------------------ |
@@ -70,7 +70,7 @@ Demos are contained in the [notebooks](notebooks) folder.
     - Interpretable ML book: molnar 2019, [pdf](https://christophm.github.io/interpretable-ml-book/)
     - Case for interpretable models rather than post-hoc explanation: rudin 2019, [pdf](https://arxiv.org/pdf/1811.10154.pdf)
     - Review on evaluating interpretability: doshi-velez & kim 2017, [pdf](https://arxiv.org/pdf/1702.08608.pdf)
-- Reference implementations (also linked above): the code here heavily derives from (and in some case is just a wrapper for) the wonderful work of previous projects. We seek to to extract out, combine, and maintain select relevant parts of these projects.
+- Reference implementations (also linked above): the code here heavily derives from the wonderful work of previous projects. We seek to to extract out, unify, and maintain key parts of these projects.
     - [sklearn-expertsys](https://github.com/tmadl/sklearn-expertsys) - by [@tmadl](https://github.com/tmadl) and [@kenben](https://github.com/kenben) based on original code by [Ben Letham](http://lethalletham.com/)
     - [rulefit](https://github.com/christophM/rulefit) - by [@christophM](https://github.com/christophM)
     - [skope-rules](https://github.com/scikit-learn-contrib/skope-rules) - by the [skope-rules team](https://github.com/scikit-learn-contrib/skope-rules/blob/master/AUTHORS.rst) (including [@ngoix](https://github.com/ngoix), [@floriangardin](https://github.com/floriangardin), [@datajms](https://github.com/datajms), [Bibi Ndiaye](), [Ronan Gautier]())
diff --git a/setup.py b/setup.py
index 0d1fd457..ae78dcee 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,7 @@
 
 setuptools.setup(
     name="imodels",
-    version="0.2.3",
+    version="0.2.4",
     author="Chandan Singh",
     author_email="chandan_singh@berkeley.edu",
     description="Implementations of various interpretable models",