diff --git a/compile_paper.sh b/compile_paper.sh new file mode 100755 index 00000000..81853164 --- /dev/null +++ b/compile_paper.sh @@ -0,0 +1,5 @@ +docker run --rm \ + --volume $PWD:/data \ + --user $(id -u):$(id -g) \ + --env JOURNAL=joss \ + openjournals/paperdraft \ No newline at end of file diff --git a/paper.md b/paper.md new file mode 100644 index 00000000..7840f4b6 --- /dev/null +++ b/paper.md @@ -0,0 +1,68 @@ +--- +title: 'imodels: a python package for fitting interpretable models' +tags: + - python + - machine learning + - interpretability + - explainability + - transparency + - decision rules +authors: + - name: Chandan Singh^[Equal contribution] + orcid: 0000-0003-0318-2340 + affiliation: 1 # (Multiple affiliations must be quoted) + - name: Keyan Nasseri^[Equal contribution] + affiliation: 1 + - name: Bin Yu + affiliation: "1, 2" +affiliations: + - name: EECS Department, University of California, Berkeley + index: 1 + - name: Statistics Department, University of California, Berkeley + index: 2 +date: 27 January 2021 +bibliography: references.bib +--- + +# Summary + +`imodels` is a Python package for concise, transparent, and accurate predictive modeling. +It provides users a simple interface for fitting and using state-of-the-art interpretable models, all compatible with scikit-learn [@pedregosa2011scikit]. +These models can often replace black-box models while improving interpretability and computational efficiency, all without sacrificing predictive accuracy. +In addition, the package provides a framework for developing custom tools and rule-based models for interpretability. + +# Statement of need + +Recent advancements in machine learning have led to increasingly complex predictive models, often at the cost of interpretability. +There is often a need for models which are inherently interpretable [@rudin2018please; @murdoch2019definitions], particularly in high-stakes applications such as medicine, biology, and political science. +In these cases, interpretability can ensure that models behave reasonably, identify when models will make errors, and make the models more trusted by domain experts. +Moreover, interpretable models tend to be much more computationally efficient then larger black-box models. + +Despite the development of many methods for fitting interpretable models [@molnar2020interpretable], implementations for such models are often difficult to find, use, and compare to one another. +`imodels` aims to fill this gap by providing a simple unified interface and implementation for many state-of-the-art interpretable modeling techniques. + +# Features + +Interpretable models can take various forms. +\autoref{fig:models} shows four possible forms a model in the `imodels` package can take. +Each form constrains the final model in order to make it interpretable, but there are different methods for fitting the model which differ in their biases and computational costs. +The `imodels` package contains implementations of various such methods and also useful functions for recombining and extending them. + +Rule sets consist of a set of rules which each act independently. + There are different strategies for deriving a rule set, such as Skope-rules [@skope] or Rulefit [@friedman2008predictive]. +Rule lists are composed of a set of rules which act in sequence, and include models such as Bayesian rule lists [@letham2015interpretable] or the oneR algorithm [@holte1993very]. +Rule trees are similar to rule lists, but allow branching after rules. This includes models such as CART decision trees [@breiman1984classification]. +Algebraic models take a final form of simple algebraic expressions, such as supersparse linear integer models [@ustun2016supersparse]. + +![Examples of different supported model forms. The bottom of each box shows predictions of the corresponding model as a function of $X_1$ and $X_2$.\label{fig:models}](./docs/img/model_table.png){ width=100% } + +# Acknowledgements + +The code here heavily derives from the wonderful work of previous projects. +In particular, we build upon the following repos and users: [sklearn-expertsys](https://github.com/tmadl/sklearn-expertsys) - by [Tamas Madl](https://github.com/tmadl) and [Benedict](https://github.com/kenben) based on original code by [Ben Letham](http://lethalletham.com/). +We also based many rule-based models on [skope-rules](https://github.com/scikit-learn-contrib/skope-rules) by the [skope-rules team](https://github.com/scikit-learn-contrib/skope-rules/blob/master/AUTHORS.rst) (including [ +Nicolas Goix](https://github.com/ngoix), [Florian Gardin](https://github.com/floriangardin), [Jean-Matthieu Schertzer](https://github.com/datajms), Bibi Ndiaye, and Ronan Gautier). +We also build upon the [rulefit](https://github.com/christophM/rulefit) repository by [Christoph Molnar](https://github.com/christophM). +The authors would also like to acknowledge very useful feedback from Yan Shuo Tan during the early stages of this project. + +# References \ No newline at end of file diff --git a/paper.pdf b/paper.pdf new file mode 100644 index 00000000..55818e20 Binary files /dev/null and b/paper.pdf differ diff --git a/references.bib b/references.bib new file mode 100644 index 00000000..282de3fa --- /dev/null +++ b/references.bib @@ -0,0 +1,96 @@ +@article{ustun2016supersparse, + title={Supersparse linear integer models for optimized medical scoring systems}, + author={Ustun, Berk and Rudin, Cynthia}, + journal={Machine Learning}, + volume={102}, + number={3}, + pages={349--391}, + year={2016}, + publisher={Springer} +} + +@article{pedregosa2011scikit, + title={Scikit-learn: Machine learning in Python}, + author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others}, + journal={the Journal of machine Learning research}, + volume={12}, + pages={2825--2830}, + year={2011}, + publisher={JMLR. org} +} + +@article{letham2015interpretable, + title={Interpretable classifiers using rules and bayesian analysis: Building a better stroke prediction model}, + author={Letham, Benjamin and Rudin, Cynthia and McCormick, Tyler H and Madigan, David and others}, + journal={Annals of Applied Statistics}, + volume={9}, + number={3}, + pages={1350--1371}, + year={2015}, + publisher={Institute of Mathematical Statistics} +} + +@article{holte1993very, + title={Very simple classification rules perform well on most commonly used datasets}, + author={Holte, Robert C}, + journal={Machine learning}, + volume={11}, + number={1}, + pages={63--90}, + year={1993}, + publisher={Springer} +} + +@book{breiman1984classification, + title={Classification and regression trees}, + author={Breiman, Leo and Friedman, Jerome and Stone, Charles J and Olshen, Richard A}, + year={1984}, + publisher={CRC press} +} + +@book{molnar2020interpretable, + title={Interpretable machine learning}, + author={Molnar, Christoph}, + year={2020}, + publisher={Lulu. com} +} + +@article{rudin2018please, + title={Please stop explaining black box models for high stakes decisions}, + author={Rudin, Cynthia}, + journal={arXiv preprint arXiv:1811.10154}, + volume={1}, + year={2018}, + publisher={Nov} +} + +@article{murdoch2019definitions, + title={Definitions, methods, and applications in interpretable machine learning}, + author={Murdoch, W James and Singh, Chandan and Kumbier, Karl and Abbasi-Asl, Reza and Yu, Bin}, + journal={Proceedings of the National Academy of Sciences}, + volume={116}, + number={44}, + pages={22071--22080}, + year={2019}, + publisher={National Acad Sciences} +} + +@article{friedman2008predictive, + title={Predictive learning via rule ensembles}, + author={Friedman, Jerome H and Popescu, Bogdan E and others}, + journal={Annals of Applied Statistics}, + volume={2}, + number={3}, + pages={916--954}, + year={2008}, + publisher={Institute of Mathematical Statistics} +} + +@misc{skope, + author = {{Skope Collaboration}}, + title = {Skope-rules}, + year = {2021}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/scikit-learn-contrib/skope-rules} +} \ No newline at end of file