Skip to content

Commit

Permalink
Merge pull request #111 from bedapub/Update-Documentation
Browse files Browse the repository at this point in the history
Update documentation
  • Loading branch information
ajulienla authored Nov 24, 2020
2 parents 4c78e2a + 7a149f7 commit 0ae87c3
Show file tree
Hide file tree
Showing 120 changed files with 1,496 additions and 669 deletions.
9 changes: 5 additions & 4 deletions besca/_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,11 @@ def get_raw(adata):


def get_ameans(adata,mycat, condition=None):
""" Calculates average and fraction expression per category in adata.obs
"""Calculates average and fraction expression per category in adata.obs
Based artihmetic mean expression and fraction cells expressing gene per category
(works on linear scale). Assumes that values in .raw are log: will exponentiate,
calculate mean and log back.
parameters
----------
adata: AnnData
Expand Down Expand Up @@ -237,8 +238,8 @@ def formatmean(average_obs, fraction_obs, what, mycond, myg):



def get_means(adata,mycat, condition=None):
""" Calculates average and fraction expression per category in adata.obs
def get_means(adata, mycat, condition=None):
"""Calculates average and fraction expression per category in adata.obs
Based on an AnnData object and an annotation category (e.g. louvain) returns
geometric mean expression if .raw values are log as it simply calculates mean
of whatever values are stored in .raw. Also returns fraction cells expressing gene per category.
Expand Down Expand Up @@ -292,7 +293,7 @@ def get_means(adata,mycat, condition=None):


def concate_adata(adata1, adata2):
"""concatenate two adata objects based on the observations
"""Concatenate two adata objects based on the observations
this function also merges the objects saved in .raw and generates a new combined.raw.
The obs from adata1 are preserved. Those from adata2 are lost.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
visualize cell fractions
Visualize cell fractions
========================
This example demonstrates how to generate celltype quantification plots. These types of plots
Expand Down
2 changes: 1 addition & 1 deletion besca/examples/gallery_examples/plotting/plot_filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
max_mito = 0.05
max_genes = 1900

#visualize filtering thresholds
#Visualize filtering thresholds
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6))= plt.subplots(ncols=3, nrows=2)
fig.set_figwidth(15)
fig.set_figheight(8)
Expand Down
23 changes: 23 additions & 0 deletions besca/examples/gallery_examples/plotting/plot_riverplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
Comparing categorical variable
===================
This example shows you how to generate riverplots to compare categorical columns,
for example to compare multiple annotations
This way you can easily check (visually) discripancies.
"""


import besca as bc

#import data
adata = bc.datasets.Baron2016_processed()

###############################################################################
# compare two categories: annotations made by different annotators
# ----------------------


bc.pl.riverplot_2categories(adata, [ 'assigned_cluster', 'celltype2'])

Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""

import besca as bc
import scanpy.api as sc
import scanpy as sc
import matplotlib.pyplot as plt

#load example dataset
Expand All @@ -28,7 +28,7 @@
#
# First the chosen thresholds are visualized to ensure that a suitable cutoff has been chosen.

#visualize filtering thresholds
#Visualize filtering thresholds
fig, ((ax1, ax2, ax3), (ax4, ax5, ax6))= plt.subplots(ncols=3, nrows=2)
fig.set_figwidth(15)
fig.set_figheight(8)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@
"""

import besca as bc
import scanpy.api as sc
import scanpy as sc

#import example dataset that has previously been filtered
adata = bc.datasets.pbmc3k_filtered()
adata
## We get the raw matrix containing all the initial genes, keeping the filtering on the cells
adata = bc.get_raw(adata)

###############################################################################
# highly variable gene selection
Expand Down Expand Up @@ -71,7 +72,7 @@
# louvain clustering
# ------------------

sc.tl.louvain(adata, random_state=random_seed)
sc.tl.leiden(adata, random_state=random_seed)

###############################################################################
# UMAP and t-SNE generation
Expand All @@ -87,5 +88,6 @@
# visualize the results
# ---------------------

sc.pl.umap(adata, color = ['louvain'])
sc.pl.tsne(adata, color = ['louvain'])
sc.pl.umap(adata, color = ['leiden'])
sc.pl.tsne(adata, color = ['leiden'])

39 changes: 24 additions & 15 deletions besca/examples/gallery_examples/tools/plot_reclustering_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,38 +4,47 @@
This example demonstrates who to perform a reclustering on a selected subset of
louvain clusters. You will want to do this for example during the process of celltype
annotation, when the louvain clusters do not have a sufficient resolution to seperate
annotation, when the clusters do not have a sufficient resolution to seperate
all clusters and mixed cell populations still exist.
"""

import besca as bc
import scanpy.api as sc
import scanpy as sc

#load and preprocess data (here we will start from a preprocessed dataset)
adata = bc.datasets.pbmc3k_processed()

#extract subset using the recluster function whcih is part of the reclustering (rc) toolkit
adata_subset = bc.tl.rc.recluster(adata, celltype=('0', '1', '3', '6'), celltype_label = 'louvain', resolution = 1.3)
adata_subset = bc.tl.rc.recluster(adata, celltype=('2', '3', '4', '5', '6','8', '9', '10', '11', '12'), celltype_label = 'leiden', resolution = 1.2)




#visualize the new clusters
sc.pl.umap(adata_subset, color = ['louvain', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'])
sc.pl.umap(adata_subset, color = ['leiden', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'])

#append new celltype labels to the subclusters
new_labels = ["CD4 T-cell", #0
#append new celltype labels to the subclusters.
# This is an approximative hand annotation that should be dealt into more widths.
new_labels = ["NK cell", #0
"CD4 T-cell", #1
"CD4 T-cell", #2
"CD8 T-cell", #3
"NK cell", #4
"CD8 T-cell", #2
"CD4 T-cell", #3
"CD8 T-cell", #4
"CD8 T-cell", #5
"CD8 T-cell",#6
"CD4 T-cell", #7
"CD4 T-cell", #8
"CD4 T-cell", #9
"CD4 T-cell"] #10
"CD4 T-cell", #6
"CD4 T-cell", #7
"CD4 T-cell", #8
"CD4 T-cell", #9
"CD4 T-cell", #10
"CD4 T-cell", #11
"CD4 T-cell" #12
] #10

#merge the labels back into the original adata object
#note this will overwrite what ever was saved in adata.obs.celltype
#note this will overwrite what ever was saved in adata.obs.celltype;
#Here is was not assigned yet.
bc.tl.rc.annotate_new_cellnames(adata, adata_subset, names=new_labels, new_label = 'celltype')

print(adata.obs.celltype.value_counts())

Original file line number Diff line number Diff line change
@@ -1,24 +1,34 @@
"""
annotate celltypes
Annotate celltypes
==================
An example workflow using the PBMC3k dataset included with besca illustrating how to annotate celltypes based on louvain clusters.
This workflow begins with a preprocessed and filtered dataset on which a louvain clustering was already performed.
An example workflow using the PBMC3k dataset included with besca illustrating how to annotate celltypes based on leiden clusters.
This workflow begins with a preprocessed and filtered dataset.
Please refer to other tutorials on how to perform these steps.
This shows how to dipslay diffrent markers genes, assign the clusters and if need be to recluster on mixed cluster.
For PBMC dataset we advised the user to work using the sig-annot or auto-annot procedures which is automated, less error-prone, and
allow for standardized annotations across datasets.
This is well illustrated in the tutorials (see Notebook 2 )
"""
import random

#load libraries
import besca as bc
import scanpy.api as sc
import besca as bc
import scanpy as sc

random.seed(1)
#load preprocessed dataset (included in BESCA for demonstration purposes)
adata = bc.datasets.pbmc3k_processed()
adata = bc.datasets.pbmc3k_filtered()

#need to drop celltype annotation stored in this dataset (only relevant for this tutorial)
adata.obs.drop(columns = ['celltype'], inplace = True)
adata.obs.drop(columns = ['leiden'], inplace = True)

sc.tl.leiden(adata)
#visualize the louvain clusters
sc.pl.umap(adata, color=['louvain'])
sc.pl.umap(adata, color=['leiden'])

##############################################################################
# visualization of marker genes
Expand Down Expand Up @@ -62,14 +72,14 @@
# be demonstrated in the rest of this tutorial.

#define high-level celltype annotation
new_labels = ["mixed", #0
"mixed", #1
"CD14+ monocyte", #2
"mixed", #3
"B-cell", #4
new_labels = ["Tcells", #0
"CD14+ monocyte", #1
"mixed", #2
"Bcells", #3
"Tcells", #4
"FCGR3A+ monocyte", #5
"mixed", #6
"pDC"] #7
"pDC", #6
"Tcells"] #7

bc.tl.annotate_cells_clustering(adata, new_labels)

Expand All @@ -83,28 +93,35 @@
# reclustering on mixed cell clusters
# -----------------------------------


#perform reclustering on subset using besca function
adata_subset = bc.tl.rc.recluster(adata, cluster=('0', '1', '3', '6'), resolution = 1.3)
adata_subset = bc.tl.rc.recluster(adata, celltype = ('mixed',"Tcells" ), celltype_label= "celltype", resolution = 1.3)

#visualize important marker genes in reclustering
sc.pl.umap(adata_subset, color = ['louvain', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'], ncols = 3)
sc.pl.umap(adata_subset, color = ['leiden', 'CD3G', 'CD8A', 'CD4', 'IL7R', 'NKG7', 'GNLY'], ncols = 3)

#annotate celltypes based on the new louvain clusters
new_labels = ["CD4 T-cell", #0
#annotate celltypes based on the new leiden clusters
new_labels = ["NK cell",#0
"CD4 T-cell", #1
"CD4 T-cell", #2
"CD8 T-cell", #3
"NK cell", #4
"CD4 T-cell", #3
"CD4 T-cell", #4
"CD8 T-cell", #5
"CD8 T-cell",#6
"CD4 T-cell", #7
"CD4 T-cell", #8
"CD4 T-cell", #9
"CD4 T-cell"] #10
"CD4 T-cell",#6
"CD8 T-cell",#7
"CD4 T-cell",#8
"CD4 T-cell",#9
"CD4 T-cell",#10
"CD4 T-cell",#11
"CD4 T-cell",#12
"NK cell" #13

]


#merge new celllabels back into the original adata object containing all cells
#Note: this will overwrite the labels contained in adata.obs.celltype! If you w
bc.tl.rc.annotate_new_cellnames(adata, adata_subset, names=new_labels)

#visualize finished celltype annotation
sc.pl.umap(adata, color = ['celltype'])
sc.pl.umap(adata, color = ['celltype'])
8 changes: 4 additions & 4 deletions besca/export/_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,9 +947,9 @@ def ranked_genes(adata,
additional_geneannotation = 'ENSEMBL'):
"""export marker genes for each cluster to .gct file
This function exports the results of scanpy.api.tl.rank_genes_groups() on your AnnData object to a .gct
This function exports the results of scanpy.tl.rank_genes_groups() on your AnnData object to a .gct
file. This file can easily be uploaded into the scsqe database since it follows the FAIR data
formats.
formats. It expect the label "rank_genes_groups" and not a personalized one.
A prerequisit for executing this function is that sc.tl.rank_genes_groups() has already been run.
Through the variables geneannotation and additional_geneannotation you can specify the type of
Expand All @@ -959,7 +959,7 @@ def ranked_genes(adata,
parameters
----------
adata:
AnnData object on which scanpy.api.tl.rank_genes_groups has been executed
AnnData object on which scanpy.tl.rank_genes_groups has been executed
type: `str` | 'wilcox' or 't-test overest var' or 't-test'
outpath `str` | default = current working directory
filepath to the directory in which the results should be outputed, if no directory is
Expand All @@ -977,7 +977,7 @@ def ranked_genes(adata,
if outpath is None:
outpath = os.getcwd()
if adata.uns.get('rank_genes_groups') is None:
sys.exit('need to rank genes before export, please run: scanpy.api.tl.rank_genes() before proceeding with export')
sys.exit('need to rank genes before export, please run: scanpy.tl.rank_genes() before proceeding with export')
else:
#extract relevant data from adata object
rank_genes = adata.uns['rank_genes_groups']
Expand Down
2 changes: 1 addition & 1 deletion besca/pl/_filter_threshold_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def max_mito (adata,
species = 'human',
copy = False,
ax = None):
"""visulize maximum mitochondrial gene percentage threshold.
"""visualize maximum mitochondrial gene percentage threshold.
this function generates a knee-plot visualizing a given min_cells cutoff when given an adata object
Expand Down
Loading

0 comments on commit 0ae87c3

Please sign in to comment.