Skip to content

Commit

Permalink
bump version
Browse files Browse the repository at this point in the history
  • Loading branch information
csinva committed Nov 15, 2020
1 parent 87b75b4 commit a68e2e5
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 70 deletions.
4 changes: 2 additions & 2 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ <h2 id="popular-interpretable-models">Popular interpretable models</h2>
preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
</code></pre>
<p>Install with <code>pip install git+https://github.com/csinva/imodels</code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). Contains the following models:</p>
<p>Install with <code>pip install imodels</code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). Contains the following models:</p>
<table>
<thead>
<tr>
Expand Down Expand Up @@ -139,7 +139,7 @@ <h2 id="references">References</h2>
<li>Review on evaluating interpretability: doshi-velez &amp; kim 2017, <a href="https://arxiv.org/pdf/1702.08608.pdf">pdf</a></li>
</ul>
</li>
<li>Reference implementations (also linked above): the code here heavily derives from (and in some case is just a wrapper for) the wonderful work of previous projects. We seek to to extract out, combine, and maintain select relevant parts of these projects.<ul>
<li>Reference implementations (also linked above): the code here heavily derives from the wonderful work of previous projects. We seek to to extract out, unify, and maintain key parts of these projects.<ul>
<li><a href="https://github.com/tmadl/sklearn-expertsys">sklearn-expertsys</a> - by <a href="https://github.com/tmadl">@tmadl</a> and <a href="https://github.com/kenben">@kenben</a> based on original code by <a href="http://lethalletham.com/">Ben Letham</a></li>
<li><a href="https://github.com/christophM/rulefit">rulefit</a> - by <a href="https://github.com/christophM">@christophM</a></li>
<li><a href="https://github.com/scikit-learn-contrib/skope-rules">skope-rules</a> - by the <a href="https://github.com/scikit-learn-contrib/skope-rules/blob/master/AUTHORS.rst">skope-rules team</a> (including <a href="https://github.com/ngoix">@ngoix</a>, <a href="https://github.com/floriangardin">@floriangardin</a>, <a href="https://github.com/datajms">@datajms</a>, <a href="">Bibi Ndiaye</a>, <a href="">Ronan Gautier</a>)</li>
Expand Down
116 changes: 66 additions & 50 deletions docs/rule_list/bayesian_rule_list/bayesian_rule_list.html
Original file line number Diff line number Diff line change
Expand Up @@ -178,34 +178,36 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
y = y.values

X, y = self._setdata(X, y, feature_labels, undiscretized_features)

permsdic = defaultdict(default_permsdic) # We will store here the MCMC results

data = list(X[:])

# Now find frequent itemsets
# Mine separately for each class
data_pos = [x for i, x in enumerate(data) if y[i] == 0]
data_neg = [x for i, x in enumerate(data) if y[i] == 1]
assert len(data_pos) + len(data_neg) == len(data)

X_df = pd.DataFrame(X, columns=feature_labels)
itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
X_colname_removed = data.copy()
for i in range(len(data)):
X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))

X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
X_df_onehot = pd.get_dummies(X_df_categorical)
onehot_features = X_df_onehot.columns

itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), &#39;rules mined&#39;)


# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for c in X_df.columns:
X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])



# now form lhs_len
Expand Down Expand Up @@ -290,15 +292,20 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
return &#34;(Untrained RuleListClassifier)&#34;

def _to_itemset_indices(self, data):
X_colname_removed = data.copy()
for i in range(len(data)):
X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
X_df_onehot = pd.get_dummies(X_df_categorical)

# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
X_df = pd.DataFrame(data, columns=self.feature_labels)
for c in X_df.columns:
X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j &gt; 0:
X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X

def predict_proba(self, X):
Expand All @@ -322,7 +329,7 @@ <h1 class="title">Module <code>imodels.rule_list.bayesian_rule_list.bayesian_rul
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X

Expand Down Expand Up @@ -536,34 +543,36 @@ <h2 id="parameters">Parameters</h2>
y = y.values

X, y = self._setdata(X, y, feature_labels, undiscretized_features)

permsdic = defaultdict(default_permsdic) # We will store here the MCMC results

data = list(X[:])

# Now find frequent itemsets
# Mine separately for each class
data_pos = [x for i, x in enumerate(data) if y[i] == 0]
data_neg = [x for i, x in enumerate(data) if y[i] == 1]
assert len(data_pos) + len(data_neg) == len(data)

X_df = pd.DataFrame(X, columns=feature_labels)
itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
X_colname_removed = data.copy()
for i in range(len(data)):
X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))

X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
X_df_onehot = pd.get_dummies(X_df_categorical)
onehot_features = X_df_onehot.columns

itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), &#39;rules mined&#39;)


# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for c in X_df.columns:
X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])



# now form lhs_len
Expand Down Expand Up @@ -648,15 +657,20 @@ <h2 id="parameters">Parameters</h2>
return &#34;(Untrained RuleListClassifier)&#34;

def _to_itemset_indices(self, data):
X_colname_removed = data.copy()
for i in range(len(data)):
X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))
X_df_categorical = pd.DataFrame(X_colname_removed, columns=self.feature_labels)
X_df_onehot = pd.get_dummies(X_df_categorical)

# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
X_df = pd.DataFrame(data, columns=self.feature_labels)
for c in X_df.columns:
X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
X = [set() for j in range(len(self.itemsets))]
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(self.itemsets):
if j &gt; 0:
X[j] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
X[j] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])
return X

def predict_proba(self, X):
Expand All @@ -680,7 +694,7 @@ <h2 id="parameters">Parameters</h2>
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X

Expand Down Expand Up @@ -808,34 +822,36 @@ <h2 id="returns">Returns</h2>
y = y.values

X, y = self._setdata(X, y, feature_labels, undiscretized_features)

permsdic = defaultdict(default_permsdic) # We will store here the MCMC results

data = list(X[:])

# Now find frequent itemsets
# Mine separately for each class
data_pos = [x for i, x in enumerate(data) if y[i] == 0]
data_neg = [x for i, x in enumerate(data) if y[i] == 1]
assert len(data_pos) + len(data_neg) == len(data)

X_df = pd.DataFrame(X, columns=feature_labels)
itemsets_df = fpgrowth(X_df, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
X_colname_removed = data.copy()
for i in range(len(data)):
X_colname_removed[i] = list(map(lambda s: s.split(&#39; : &#39;)[1], X_colname_removed[i]))

X_df_categorical = pd.DataFrame(X_colname_removed, columns=feature_labels)
X_df_onehot = pd.get_dummies(X_df_categorical)
onehot_features = X_df_onehot.columns

itemsets_df = fpgrowth(X_df_onehot, min_support=(self.minsupport / len(X)), max_len=self.maxcardinality)
itemsets_indices = [tuple(s[1]) for s in itemsets_df.values]
itemsets = [np.array(feature_labels)[list(inds)] for inds in itemsets_indices]
itemsets = [np.array(onehot_features)[list(inds)] for inds in itemsets_indices]
itemsets = list(map(tuple, itemsets))
if self.verbose:
print(len(itemsets), &#39;rules mined&#39;)


# Now form the data-vs.-lhs set
# X[j] is the set of data points that contain itemset j (that is, satisfy rule j)
for c in X_df.columns:
X_df[c] = [c if x == 1 else &#39;&#39; for x in list(X_df[c])]
for c in X_df_onehot.columns:
X_df_onehot[c] = [c if x == 1 else &#39;&#39; for x in list(X_df_onehot[c])]
X = [{}] * (len(itemsets) + 1)
X[0] = set(range(len(data))) # the default rule satisfies all data
for (j, lhs) in enumerate(itemsets):
X[j + 1] = set([i for (i, xi) in enumerate(X_df.values) if set(lhs).issubset(xi)])
X[j + 1] = set([i for (i, xi) in enumerate(X_df_onehot.values) if set(lhs).issubset(xi)])



# now form lhs_len
Expand Down Expand Up @@ -953,7 +969,7 @@ <h2 id="returns">Returns</h2>
if self.discretizer:
self.discretizer._data = pd.DataFrame(X, columns=self.feature_labels)
self.discretizer.apply_cutpoints()
D = self._prepend_feature_labels(np.array(self.discretizer._data)[:, :-1])
D = self._prepend_feature_labels(np.array(self.discretizer._data))
else:
D = X

Expand Down
Loading

0 comments on commit a68e2e5

Please sign in to comment.