Merge pull request #111 from bedapub/Update-Documentation

Update documentation
bedapub · Nov 24, 2020 · 0ae87c3 · 0ae87c3
2 parents 4c78e2a + 7a149f7
commit 0ae87c3
Show file tree

Hide file tree

Showing 120 changed files with 1,496 additions and 669 deletions.
diff --git a/besca/_helper.py b/besca/_helper.py
@@ -178,10 +178,11 @@ def get_raw(adata):
 
 
 def get_ameans(adata,mycat, condition=None):
-    """ Calculates average and fraction expression per category in adata.obs
+    """Calculates average and fraction expression per category in adata.obs
     Based artihmetic mean expression and fraction cells expressing gene per category
     (works on linear scale). Assumes that values in .raw are log: will exponentiate, 
     calculate mean and log back. 
+
     parameters
     ----------
     adata: AnnData
@@ -237,8 +238,8 @@ def formatmean(average_obs, fraction_obs, what, mycond, myg):
 
 
 
-def get_means(adata,mycat, condition=None):
-    """ Calculates average and fraction expression per category in adata.obs
+def get_means(adata, mycat, condition=None):
+    """Calculates average and fraction expression per category in adata.obs
     Based on an AnnData object and an annotation category (e.g. louvain) returns 
     geometric mean expression if .raw values are log as it simply calculates mean
     of whatever values are stored in .raw. Also returns fraction cells expressing gene per category. 
@@ -292,7 +293,7 @@ def get_means(adata,mycat, condition=None):
 
 
 def concate_adata(adata1, adata2):
-    """concatenate two adata objects based on the observations
+    """Concatenate two adata objects based on the observations
 
     this function also merges the objects saved in .raw and generates a new combined.raw.
     The obs from adata1 are preserved. Those from adata2 are lost.

diff --git a/besca/examples/gallery_examples/plotting/plot_celltype_quantification.py b/besca/examples/gallery_examples/plotting/plot_celltype_quantification.py
@@ -1,5 +1,5 @@
 """
-visualize cell fractions
+Visualize cell fractions
 ========================
 
 This example demonstrates how to generate celltype quantification plots. These types of plots 

diff --git a/besca/examples/gallery_examples/plotting/plot_filtering.py b/besca/examples/gallery_examples/plotting/plot_filtering.py
@@ -20,7 +20,7 @@
 max_mito = 0.05
 max_genes = 1900
 
-#visualize filtering thresholds
+#Visualize filtering thresholds
 fig, ((ax1, ax2, ax3), (ax4, ax5, ax6))= plt.subplots(ncols=3, nrows=2)
 fig.set_figwidth(15)
 fig.set_figheight(8)

diff --git a/besca/examples/gallery_examples/plotting/plot_riverplot.py b/besca/examples/gallery_examples/plotting/plot_riverplot.py
@@ -0,0 +1,23 @@
+"""
+Comparing categorical variable
+===================
+
+This example shows you how to generate riverplots to compare categorical columns, 
+for example to compare multiple annotations
+This way you can easily check (visually) discripancies.
+
+"""
+
+
+import besca as bc 
+
+#import data
+adata = bc.datasets.Baron2016_processed()
+
+###############################################################################
+# compare two categories: annotations made by different annotators
+# ----------------------
+
+
+bc.pl.riverplot_2categories(adata,  [ 'assigned_cluster', 'celltype2'])
+
diff --git a/besca/examples/gallery_examples/preprocessing/plot_example_filtering.py b/besca/examples/gallery_examples/preprocessing/plot_example_filtering.py
@@ -8,7 +8,7 @@
 """
 
 import besca as bc
-import scanpy.api as sc
+import scanpy as sc
 import matplotlib.pyplot as plt
 
 #load example dataset
@@ -28,7 +28,7 @@
 #
 # First the chosen thresholds are visualized to ensure that a suitable cutoff has been chosen.
 
-#visualize filtering thresholds
+#Visualize filtering thresholds
 fig, ((ax1, ax2, ax3), (ax4, ax5, ax6))= plt.subplots(ncols=3, nrows=2)
 fig.set_figwidth(15)
 fig.set_figheight(8)

diff --git a/besca/examples/gallery_examples/preprocessing/plot_pca_neighbors_clustering.py b/besca/examples/gallery_examples/preprocessing/plot_pca_neighbors_clustering.py
@@ -7,11 +7,12 @@
 """
 
 import besca as bc
-import scanpy.api as sc
+import scanpy as sc
 
 #import example dataset that has previously been filtered
 adata = bc.datasets.pbmc3k_filtered()
-adata
+## We get the raw matrix containing all the initial genes, keeping the filtering on the cells
+adata = bc.get_raw(adata)
 
 ###############################################################################
 # highly variable gene selection
@@ -71,7 +72,7 @@
 # louvain clustering
 # ------------------
 
-sc.tl.louvain(adata, random_state=random_seed)
+sc.tl.leiden(adata, random_state=random_seed)
 
 ###############################################################################
 # UMAP and t-SNE generation
@@ -87,5 +88,6 @@
 # visualize the results
 # ---------------------
 
-sc.pl.umap(adata, color = ['louvain'])
-sc.pl.tsne(adata, color = ['louvain'])
+sc.pl.umap(adata, color = ['leiden'])
+sc.pl.tsne(adata, color = ['leiden'])
+
diff --git a/besca/examples/gallery_examples/tools/plot_reclustering_function.py b/besca/examples/gallery_examples/tools/plot_reclustering_function.py
@@ -4,38 +4,47 @@
 
 This example demonstrates who to perform a reclustering on a selected subset of
 louvain clusters. You will want to do this for example during the process of celltype
-annotation, when the louvain clusters do not have a sufficient resolution to seperate
+annotation, when the  clusters do not have a sufficient resolution to seperate
 all clusters and mixed cell populations still exist.
 
 """
 
 import besca as bc
-import scanpy.api as sc
+import scanpy as sc
 
 #load and preprocess data (here we will start from a preprocessed dataset)
 adata = bc.datasets.pbmc3k_processed()
 
 #extract subset using the recluster function whcih is part of the reclustering (rc) toolkit
-adata_subset = bc.tl.rc.recluster(adata, celltype=('0', '1', '3', '6'), celltype_label = 'louvain', resolution = 1.3)
+adata_subset = bc.tl.rc.recluster(adata, celltype=('2', '3', '4', '5', '6','8', '9', '10', '11', '12'), celltype_label = 'leiden', resolution = 1.2)
+
+
+
 
 #visualize the new clusters
-sc.pl.umap(adata_subset, color = ['louvain', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'])
+sc.pl.umap(adata_subset, color = ['leiden', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'])
 
-#append new celltype labels to the subclusters
-new_labels = ["CD4 T-cell", #0
+#append new celltype labels to the subclusters.
+# This is an approximative hand annotation that should be dealt into more widths.
+new_labels = ["NK cell", #0
               "CD4 T-cell", #1
-              "CD4 T-cell", #2
-              "CD8 T-cell", #3
-              "NK cell", #4
+              "CD8 T-cell", #2
+              "CD4 T-cell", #3
+              "CD8 T-cell", #4
               "CD8 T-cell", #5
-              "CD8 T-cell",#6
-              "CD4 T-cell", #7
-              "CD4 T-cell", #8
-              "CD4 T-cell", #9
-              "CD4 T-cell"] #10
+              "CD4 T-cell", #6
+              "CD4 T-cell",  #7
+              "CD4 T-cell",  #8
+              "CD4 T-cell",  #9
+              "CD4 T-cell", #10
+              "CD4 T-cell", #11
+              "CD4 T-cell" #12
+              ] #10
 
 #merge the labels back into the original adata object
-#note this will overwrite what ever was saved in adata.obs.celltype
+#note this will overwrite what ever was saved in adata.obs.celltype;
+#Here is was not assigned yet.
 bc.tl.rc.annotate_new_cellnames(adata, adata_subset, names=new_labels, new_label = 'celltype')
 
 print(adata.obs.celltype.value_counts())
+
diff --git a/besca/examples/gallery_examples/workflows/plot_celltype_annotation.py b/besca/examples/gallery_examples/workflows/plot_celltype_annotation.py
@@ -1,24 +1,34 @@
 """
-annotate celltypes
+Annotate celltypes
 ==================
 
-An example workflow using the PBMC3k dataset included with besca illustrating how to annotate celltypes based on louvain clusters.
-This workflow begins with a preprocessed and filtered dataset on which a louvain clustering was already performed. 
+An example workflow using the PBMC3k dataset included with besca illustrating how to annotate celltypes based on leiden clusters.
+This workflow begins with a preprocessed and filtered dataset. 
 Please refer to other tutorials on how to perform these steps.
 
+This shows how to dipslay diffrent markers genes, assign the clusters and if need be to recluster on mixed cluster.
+For PBMC dataset we advised the user to work using the sig-annot or auto-annot procedures which is automated, less error-prone, and
+allow for standardized annotations across datasets.
+
+This is well illustrated in the tutorials (see Notebook 2 )
+
 """
+import random
+
 #load libraries
-import besca as bc 
-import scanpy.api as sc
+import besca as bc
+import scanpy as sc
 
+random.seed(1)
 #load preprocessed dataset (included in BESCA for demonstration purposes)
-adata = bc.datasets.pbmc3k_processed()
+adata = bc.datasets.pbmc3k_filtered()
 
 #need to drop celltype annotation stored in this dataset (only relevant for this tutorial)
-adata.obs.drop(columns = ['celltype'], inplace = True)
+adata.obs.drop(columns = ['leiden'], inplace = True)
 
+sc.tl.leiden(adata)
 #visualize the louvain clusters
-sc.pl.umap(adata, color=['louvain'])
+sc.pl.umap(adata, color=['leiden'])
 
 ##############################################################################
 # visualization of marker genes
@@ -62,14 +72,14 @@
 # be demonstrated in the rest of this tutorial.
 
 #define high-level celltype annotation
-new_labels = ["mixed", #0
-              "mixed", #1
-              "CD14+ monocyte", #2
-              "mixed", #3
-              "B-cell", #4
+new_labels = ["Tcells", #0
+               "CD14+ monocyte", #1
+              "mixed", #2
+              "Bcells", #3
+              "Tcells", #4
               "FCGR3A+ monocyte", #5
-              "mixed", #6
-              "pDC"] #7
+              "pDC", #6
+              "Tcells"] #7
 
 bc.tl.annotate_cells_clustering(adata, new_labels)
 
@@ -83,28 +93,35 @@
 # reclustering on mixed cell clusters
 # -----------------------------------
 
+
 #perform reclustering on subset using besca function
-adata_subset = bc.tl.rc.recluster(adata, cluster=('0', '1', '3', '6'), resolution = 1.3)
+adata_subset = bc.tl.rc.recluster(adata, celltype =  ('mixed',"Tcells" ), celltype_label= "celltype",  resolution = 1.3)
 
 #visualize important marker genes in reclustering
-sc.pl.umap(adata_subset, color = ['louvain', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'], ncols = 3)
+sc.pl.umap(adata_subset, color = ['leiden', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'], ncols = 3)
 
-#annotate celltypes based on the new louvain clusters
-new_labels = ["CD4 T-cell", #0
+#annotate celltypes based on the new leiden clusters
+new_labels = ["NK cell",#0
               "CD4 T-cell", #1
               "CD4 T-cell", #2
-              "CD8 T-cell", #3
-              "NK cell", #4
+              "CD4 T-cell", #3
+              "CD4 T-cell", #4
               "CD8 T-cell", #5
-              "CD8 T-cell",#6
-              "CD4 T-cell", #7
-              "CD4 T-cell", #8
-              "CD4 T-cell", #9
-              "CD4 T-cell"] #10
+              "CD4 T-cell",#6
+              "CD8 T-cell",#7
+              "CD4 T-cell",#8
+              "CD4 T-cell",#9
+              "CD4 T-cell",#10
+              "CD4 T-cell",#11
+              "CD4 T-cell",#12              
+              "NK cell" #13
+
+              ] 
+
 
 #merge new celllabels back into the original adata object containing all cells
 #Note: this will overwrite the labels contained in adata.obs.celltype! If you w
 bc.tl.rc.annotate_new_cellnames(adata, adata_subset, names=new_labels)
 
 #visualize finished celltype annotation
-sc.pl.umap(adata, color = ['celltype'])
+sc.pl.umap(adata, color = ['celltype'])
diff --git a/besca/export/_export.py b/besca/export/_export.py
@@ -947,9 +947,9 @@ def ranked_genes(adata,
                  additional_geneannotation = 'ENSEMBL'):
     """export marker genes for each cluster to .gct file
 
-    This function exports the results of scanpy.api.tl.rank_genes_groups() on your AnnData object to a .gct
+    This function exports the results of scanpy.tl.rank_genes_groups() on your AnnData object to a .gct
     file. This file can easily be uploaded into the scsqe database since it follows the FAIR data
-    formats. 
+    formats. It expect the label "rank_genes_groups" and not a personalized one.
 
     A prerequisit for executing this function is that sc.tl.rank_genes_groups() has already been run.
     Through the variables geneannotation and additional_geneannotation you can specify the type of
@@ -959,7 +959,7 @@ def ranked_genes(adata,
     parameters
     ----------
     adata:
-        AnnData object on which scanpy.api.tl.rank_genes_groups has been executed
+        AnnData object on which scanpy.tl.rank_genes_groups has been executed
     type: `str` | 'wilcox' or 't-test overest var'  or 't-test'
     outpath `str` | default = current working directory
         filepath to the directory in which the results should be outputed, if no directory is 
@@ -977,7 +977,7 @@ def ranked_genes(adata,
     if outpath is None:
         outpath = os.getcwd()
     if adata.uns.get('rank_genes_groups') is None:
-        sys.exit('need to rank genes before export, please run: scanpy.api.tl.rank_genes() before proceeding with export')
+        sys.exit('need to rank genes before export, please run: scanpy.tl.rank_genes() before proceeding with export')
     else:
         #extract relevant data from adata object
         rank_genes = adata.uns['rank_genes_groups']

diff --git a/besca/pl/_filter_threshold_plots.py b/besca/pl/_filter_threshold_plots.py
@@ -309,7 +309,7 @@ def max_mito (adata,
               species = 'human',
               copy = False,
               ax = None):
-    """visulize maximum mitochondrial gene percentage threshold.
+    """visualize maximum mitochondrial gene percentage threshold.
 
     this function generates a knee-plot visualizing a given min_cells cutoff when given an adata object