MOABB release version 0.2.0 (#51)

Compatible with mne 0.16, seaborn 0.9 Schirrmeister2017 high gamma dataset added to motor imagery Basic P300 paradigm and datasets (EPFL, Brain Invaders) added Basic SSVEP paradigm, datasets, and pipelines added Doc fixes
NeuroTechX · Feb 1, 2019 · 8dd6bfb · 8dd6bfb
1 parent cf541ac
commit 8dd6bfb
Show file tree

Hide file tree

Showing 36 changed files with 1,510 additions and 96 deletions.
diff --git a/.gitignore b/.gitignore
@@ -112,3 +112,6 @@ analysis/*
 /moabb/tests/orderplot.pdf
 /moabb/tests/orderplot_full.pdf
 /test_context.yml
+
+# mac os x stuff
+*DS_store*
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # Mother of all BCI Benchmark
-<div align="center">
-  <img src="/images/M.png">
-</div>
+<p align="center">
+  <img alt="banner" src="/images/M.png/">
+</p>
 <p align="center" href="">
   Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets.
 </p>
@@ -32,23 +32,23 @@ This document (the README file) is a hub to give you some information about the
 * [Documentation](#docs)
 * [Architecture and main concepts](#architecture)
 
-We also have a recent [arXiv preprint][link_arxiv]. 
+We also have a recent [paper][link_paper] in JNE.
 
 ## What are we doing?
 
 ### The problem
 
-* Reproducible Research in BCI has a long way to go. 
-* While many BCI datasets are made freely available, researchers do not publish code, and reproducing results required to benchmark new algorithms turns out to be more tricky than it should be. 
-* Performances can be significantly impacted by parameters of the preprocessing steps, toolboxes used and implementation “tricks” that are almost never reported in the literature. 
+* Reproducible Research in BCI has a long way to go.
+* While many BCI datasets are made freely available, researchers do not publish code, and reproducing results required to benchmark new algorithms turns out to be more tricky than it should be.
+* Performances can be significantly impacted by parameters of the preprocessing steps, toolboxes used and implementation “tricks” that are almost never reported in the literature.
 
 As a results, there is no comprehensive benchmark of BCI algorithm, and newcomers are spending a tremendous amount of time browsing literature to find out what algorithm works best and on which dataset.
 
 ### The solution
 
 The Mother of all BCI Benchmark will:
 
-* Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets. 
+* Build a comprehensive benchmark of popular BCI algorithms applied on an extensive list of freely available EEG datasets.
 * The code will be made available on github, serving as a reference point for the future algorithmic developments.
 * Algorithms can be ranked and promoted on a website, providing a clear picture of the different solutions available in the field.
 
@@ -205,3 +205,4 @@ make html
 [link_neurotechx_signup]: https://docs.google.com/forms/d/e/1FAIpQLSfZyzhVdOLU8_oQ4NylHL8EFoKLIVmryGXA4u7HDsZpkTryvg/viewform
 [link_moabb_docs]: http://moabb.neurotechx.com/docs/index.html
 [link_arxiv]: https://arxiv.org/abs/1805.06427
+[link_jne]: http://iopscience.iop.org/article/10.1088/1741-2552/aadea0/meta
diff --git a/docs/source/README.md b/docs/source/README.md
@@ -26,7 +26,7 @@ This document (the README file) is a hub to give you some information about the
 * [Documentation](#docs)
 * [Architecture and main concepts](#architecture)
 
-We also have a recent [arXiv preprint][link_arxiv]. 
+We also have a recent [paper][link_paper] in JNE. 
 
 ## What are we doing?
 
@@ -198,3 +198,4 @@ make html
 [link_neurotechx_signup]: https://docs.google.com/forms/d/e/1FAIpQLSfZyzhVdOLU8_oQ4NylHL8EFoKLIVmryGXA4u7HDsZpkTryvg/viewform
 [link_moabb_docs]: http://moabb.neurotechx.com/docs/index.html
 [link_arxiv]: https://arxiv.org/abs/1805.06427
+[link_jne]: http://iopscience.iop.org/article/10.1088/1741-2552/aadea0/meta
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -67,9 +67,9 @@
 plot_html_show_source_link = False
 
 sphinx_gallery_conf = {
-   'examples_dirs': ['../../examples', '../../tutorials'],
-   'gallery_dirs': ['auto_examples', 'auto_tutorials'],
-   'backreferences_dir': False}
+    'examples_dirs': ['../../examples', '../../tutorials'],
+    'gallery_dirs': ['auto_examples', 'auto_tutorials'],
+    'backreferences_dir': False}
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']

diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -24,12 +24,14 @@ Motor Imagery Datasets
     MunichMI
     Ofner2017
     PhysionetMI
+    Schirrmeister2017
     Shin2017A
     Shin2017B
     Weibo2014
     Zhou2016
     SSVEPExo
 
+
 ------------
 ERP Datasets
 ------------
@@ -38,6 +40,12 @@ ERP Datasets
     :toctree: generated/
     :template: class.rst
 
+    bi2013a
+    BNCI2014008
+    BNCI2014009
+    BNCI2015003
+    EPFLP300
+
 --------------
 SSVEP Datasets
 --------------
@@ -46,6 +54,8 @@ SSVEP Datasets
     :toctree: generated/
     :template: class.rst
 
+    SSVEPExo
+
 ------------
 Base & Utils
 ------------

diff --git a/examples/plot_cross_session_motor_imagery.py b/examples/plot_cross_session_motor_imagery.py
@@ -61,7 +61,7 @@
 
 pipelines['RG + LR'] = make_pipeline(Covariances(),
                                      TangentSpace(),
-                                     LogisticRegression())
+                                     LogisticRegression(solver='lbfgs'))
 
 ##############################################################################
 # Evaluation
@@ -76,7 +76,10 @@
 # be overwrited if necessary.
 
 paradigm = LeftRightImagery()
-datasets = [BNCI2014001()]
+# Because this is being auto-generated we only use 2 subjects
+dataset = BNCI2014001()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
 overwrite = False  # set to True if we want to overwrite cached results
 evaluation = CrossSessionEvaluation(paradigm=paradigm, datasets=datasets,
                                     suffix='examples', overwrite=overwrite)

diff --git a/examples/plot_filterbank_csp_vs_csp.py b/examples/plot_filterbank_csp_vs_csp.py
@@ -63,7 +63,10 @@
 # The second is a `FilterBankLeftRightImagery` with a bank of 6 filter, ranging
 # from 8 to 35 Hz.
 
-datasets = [BNCI2014001()]
+# Because this is being auto-generated we only use 2 subjects
+dataset = BNCI2014001()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
 overwrite = False  # set to True if we want to overwrite cached results
 
 # broadband filters

diff --git a/examples/plot_within_session_p300.py b/examples/plot_within_session_p300.py
@@ -0,0 +1,126 @@
+"""
+===========================
+Within Session P300
+===========================
+
+This Example shows how to perform a within session analysis on three different
+P300 datasets.
+
+We will compare two pipelines :
+
+- Riemannian Geometry
+- xDawn with Linear Discriminant Analysis
+
+We will use the P300 paradigm, which uses the AUC as metric.
+
+"""
+# Authors: Pedro Rodrigues <[email protected]>
+#
+# License: BSD (3-clause)
+
+# getting rid of the warnings about the future (on s'en fout !)
+from sklearn.pipeline import make_pipeline
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+from sklearn.base import BaseEstimator, TransformerMixin
+from pyriemann.tangentspace import TangentSpace
+from pyriemann.estimation import XdawnCovariances, Xdawn
+from moabb.evaluations import WithinSessionEvaluation
+from moabb.paradigms import P300
+from moabb.datasets import EPFLP300
+import moabb
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+warnings.simplefilter(action='ignore', category=RuntimeWarning)
+
+
+moabb.set_log_level('info')
+
+# This is an auxiliary transformer that allows one to vectorize data
+# structures in a pipeline For instance, in the case of a X with dimensions
+# Nt x Nc x Ns, one might be interested in a new data structure with
+# dimensions Nt x (Nc.Ns)
+
+
+class Vectorizer(BaseEstimator, TransformerMixin):
+
+    def __init__(self):
+        pass
+
+    def fit(self, X, y):
+        """fit."""
+        return self
+
+    def transform(self, X):
+        """transform. """
+        return np.reshape(X, (X.shape[0], -1))
+
+##############################################################################
+# Create pipelines
+# ----------------
+#
+# Pipelines must be a dict of sklearn pipeline transformer.
+
+
+pipelines = {}
+
+# we have to do this because the classes are called 'Target' and 'NonTarget'
+# but the evaluation function uses a LabelEncoder, transforming them
+# to 0 and 1
+labels_dict = {'Target': 1, 'NonTarget': 0}
+
+pipelines['RG + LDA'] = make_pipeline(
+    XdawnCovariances(
+        nfilter=2,
+        classes=[
+            labels_dict['Target']],
+        estimator='lwf',
+        xdawn_estimator='lwf'),
+    TangentSpace(),
+    LDA(solver='lsqr', shrinkage='auto'))
+
+pipelines['Xdw + LDA'] = make_pipeline(Xdawn(nfilter=2, estimator='lwf'),
+                                       Vectorizer(), LDA(solver='lsqr',
+                                                         shrinkage='auto'))
+
+##############################################################################
+# Evaluation
+# ----------
+#
+# We define the paradigm (P300) and use all three datasets available for it.
+# The evaluation will return a dataframe containing a single AUC score for
+# each subject / session of the dataset, and for each pipeline.
+#
+# Results are saved into the database, so that if you add a new pipeline, it
+# will not run again the evaluation unless a parameter has changed. Results can
+# be overwritten if necessary.
+
+paradigm = P300(resample=128)
+dataset = EPFLP300()
+dataset.subject_list = dataset.subject_list[:2]
+datasets = [dataset]
+overwrite = True  # set to True if we want to overwrite cached results
+evaluation = WithinSessionEvaluation(paradigm=paradigm,
+                                     datasets=datasets,
+                                     suffix='examples', overwrite=overwrite)
+results = evaluation.process(pipelines)
+
+##############################################################################
+# Plot Results
+# ----------------
+#
+# Here we plot the results.
+
+fig, ax = plt.subplots(facecolor='white', figsize=[8, 4])
+
+sns.stripplot(data=results, y='score', x='pipeline', ax=ax, jitter=True,
+              alpha=.5, zorder=1, palette="Set1")
+sns.pointplot(data=results, y='score', x='pipeline', ax=ax,
+              zorder=1, palette="Set1")
+
+ax.set_ylabel('ROC AUC')
+ax.set_ylim(0.5, 1)
+
+fig.show()
diff --git a/moabb/analysis/__init__.py b/moabb/analysis/__init__.py
@@ -1,9 +1,13 @@
 import os
+import logging
 import platform
 from datetime import datetime
-from moabb.analysis import plotting as plt  # noqa: E501
-from moabb.analysis.results import Results  # noqa: E501,F401
-from moabb.analysis.meta_analysis import find_significant_differences, compute_dataset_statistics  # noqa: E501
+from moabb.analysis import plotting as plt
+from moabb.analysis.results import Results  # noqa: F401
+from moabb.analysis.meta_analysis import (
+    find_significant_differences, compute_dataset_statistics)  # noqa: E501
+
+log = logging.getLogger()
 
 
 def analyze(results, out_path, name='analysis', plot=False):
@@ -27,20 +31,27 @@ def analyze(results, out_path, name='analysis', plot=False):
 
     '''
     # input checks #
-    if type(out_path) is not str:
+    if not isinstance(out_path, str):
         raise ValueError('Given out_path argument is not string')
     elif not os.path.isdir(out_path):
         raise IOError('Given directory does not exist')
     else:
         analysis_path = os.path.join(out_path, name)
 
+    unique_ids = [plt._simplify_names(x) for x in results.pipeline.unique()]
+    simplify = True
+    print(unique_ids)
+    print(set(unique_ids))
+    if len(unique_ids) != len(set(unique_ids)):
+        log.warning(
+            'Pipeline names are too similar, turning off name shortening')
+        simplify = False
+
     os.makedirs(analysis_path, exist_ok=True)
     # TODO: no good cross-platform way of recording CPU info?
     with open(os.path.join(analysis_path, 'info.txt'), 'a') as f:
         dt = datetime.now()
-        f.write(
-            'Date: {:%Y-%m-%d}\n Time: {:%H:%M}\n'.format(dt,
-                                                          dt))
+        f.write('Date: {:%Y-%m-%d}\n Time: {:%H:%M}\n'.format(dt, dt))
         f.write('System: {}\n'.format(platform.system()))
         f.write('CPU: {}\n'.format(platform.processor()))
 
@@ -52,5 +63,5 @@ def analyze(results, out_path, name='analysis', plot=False):
     if plot:
         fig, color_dict = plt.score_plot(results)
         fig.savefig(os.path.join(analysis_path, 'scores.pdf'))
-        fig = plt.summary_plot(P, T)
+        fig = plt.summary_plot(P, T, simplify=simplify)
         fig.savefig(os.path.join(analysis_path, 'ordering.pdf'))
diff --git a/moabb/analysis/plotting.py b/moabb/analysis/plotting.py
@@ -11,7 +11,8 @@
 
 
 PIPELINE_PALETTE = sea.color_palette("husl", 6)
-sea.set(font='serif', style='whitegrid', palette=PIPELINE_PALETTE)
+sea.set(font='serif', style='whitegrid',
+        palette=PIPELINE_PALETTE, color_codes=False)
 
 log = logging.getLogger()
 
@@ -72,15 +73,16 @@ def paired_plot(data, alg1, alg2):
     return fig
 
 
-def summary_plot(sig_df, effect_df, p_threshold=0.05):
+def summary_plot(sig_df, effect_df, p_threshold=0.05, simplify=True):
     '''Visualize significances as a heatmap with green/grey/red for significantly
     higher/significantly lower.
     sig_df is a DataFrame of pipeline x pipeline where each value is a p-value,
     effect_df is a DF where each value is an effect size
 
     '''
-    effect_df.columns = effect_df.columns.map(_simplify_names)
-    sig_df.columns = sig_df.columns.map(_simplify_names)
+    if simplify:
+        effect_df.columns = effect_df.columns.map(_simplify_names)
+        sig_df.columns = sig_df.columns.map(_simplify_names)
     annot_df = effect_df.copy()
     for row in annot_df.index:
         for col in annot_df.columns:

diff --git a/moabb/analysis/results.py b/moabb/analysis/results.py
@@ -54,7 +54,7 @@ class that will abstract result storage
         self.filepath = os.path.join(self.mod_dir, 'results',
                                      paradigm_class.__name__,
                                      evaluation_class.__name__,
-                                     'results{}.hdf5'.format('_'+suffix))
+                                     'results{}.hdf5'.format('_' + suffix))
 
         os.makedirs(os.path.dirname(self.filepath), exist_ok=True)
         self.filepath = self.filepath

diff --git a/moabb/datasets/Weibo2014.py b/moabb/datasets/Weibo2014.py
@@ -29,7 +29,7 @@ def eeg_data_path(base_path, subject):
 
     def get_subjects(sub_inds, sub_names, ind):
         dataname = 'data{}'.format(ind)
-        if not os.path.isfile(os.path.join(base_path, dataname+'.zip')):
+        if not os.path.isfile(os.path.join(base_path, dataname + '.zip')):
             _fetch_file(FILES[ind], os.path.join(
                 base_path, dataname + '.zip'), print_destination=False)
         with z.ZipFile(os.path.join(base_path, dataname + '.zip'), 'r') as f:

diff --git a/moabb/datasets/__init__.py b/moabb/datasets/__init__.py
@@ -8,9 +8,12 @@
 from .gigadb import Cho2017
 from .alex_mi import AlexMI
 from .physionet_mi import PhysionetMI
-from .bnci import (BNCI2014001, BNCI2014002, BNCI2014004, BNCI2015001,
-                   BNCI2015004)
+from .bnci import (BNCI2014001, BNCI2014002, BNCI2014004, BNCI2014008,
+                   BNCI2014009, BNCI2015001, BNCI2015003, BNCI2015004)
 from .bbci_eeg_fnirs import Shin2017A, Shin2017B
+from .schirrmeister2017 import Schirrmeister2017
+from .epfl import EPFLP300
+from .braininvaders import bi2013a
 from .upper_limb import Ofner2017
 from .Weibo2014 import Weibo2014
 from .Zhou2016 import Zhou2016