config.yaml

# Path to sheet describing meta-information for each cell.
# The first column has to contain the cell id, all other columns are optional.
# They are added as metadata to the SingleCellExperiment object created in the
# workflow.
cells: cells.tsv

counts:
  # specify count table (rows: genes/transcripts/spikes, cols: cells)
  path: counts.tsv
  # define which kind of features are described in each row (must be a term understood by Ensembl biomart, e.g. ensembl_gene_id, hgnc_symbol)
  feature_ids: ensembl_gene_id
  # BioMart host to use (e.g. www.ensembl.org, useast.ensembl.org, ...)
  biomart: useast.ensembl.org

# Variables to use for batch effect removal and as factors to be ignored in variance
# analysis. This should always be the two cell cycle phases (G1, G2M) which are
# determined automatically during analysis. In addition, you may add any column
# name of the cell sheet (see above).
model:
  # R formula representing known batch effects
  design: "~ G1 + G2M + detection_rate"
  # Rely on spike-ins for variance model estimation.
  # This is usually not desired because of few and noisy spike-ins.
  # If this is set to false, engogeneous genes are used instead, under the
  # assumption that most of them are not differentially expressed.
  use-spikes: false
  # Minimum value of average difference in true (biological) log2 expression
  # between any two cells.
  min-bio-comp: 0.5
  # FDR threshold for selecting highly variable genes/transcripts (HVGs)
  fdr: 0.05
  # show the expression distribution of the top n HVGs
  show-n: 20
  # compute correlation for top n HVGs
  top-n: 200
  # Expressions below this logcount are considered as dropouts 
  # logcounts are log2 transformed, including 1 pseudocount, normalized, 
  # and batch effect corrected).
  # This is currently only used for gene-vs-gene plots (see below).
  dropout-threshold: 1

# Comment out to not assign cell type via signatures
celltype:
  # Table describing markers for assignment of cell types.
  # Columns: name (cell type name), parent (parent cell type name),
  #          genes (comma-separated list of gene names/ids, as listed in the
  #          count matrix)
  # Thereby, parent is usually empty. If not, it means that assignment for that
  # type happens recursively only on those cells that have been assigned to the
  # parent type.
  markers: resources/markers.tsv
  # Minimum gamma score for assigned cell type (resembles a posterior) to be
  # considered as correctly assigned. Cells where the certainty of cellassign
  # does not pass this threshold will show as celltype=NA.
  min_gamma: 0.9
  # Genes to create expression plots stratified by celltype for.
  # This can be used to find the right selection of marker genes for cellassign.
  expression-plot-genes:
    - GBP1
    - MYCN

gene-vs-gene-plots:
  all-malignant:
    # uncomment below to perform a correlation of given type (pearson, spearman, ...)
    correlation: spearman
    # uncomment below to perform a regression with given formula
    # regression: "y ~ x"
    # constrain to cells of the following types (comment out if not needed)
    constrain-celltypes:
      - Malignant
    pairs:
      x: DDX58
      y:
        - IRF1
        - CCL5
        - CXCL10


# Comment out to not do differential expression analysis.
diffexp:
  # Add one entry per comparison here. The key below can be an arbitrary name.
  a-vs-b:
    # EdgeR design formula.
    # Refer to any colData from SingleCellExperiment here.
    # In addition, you can use celltype and detection_rate
    # (number of expressed genes in cell divided by total
    # number of genes in experiment).
    design: "~ test.condition"
    # Which coefficients of the model should be tested equal to zero.
    # E.g., 2 to test the first coefficient after the implicit intercept
    # (i.e., celltype in the example above).
    coef: 2
    # False discovery rate to control for.
    fdr: 0.05
    # Optional: constrain to cell types (comment out to use all cell types).
    constrain-celltypes:
      celltypes:
        - Endothelial-cell
      # Optional: constrain cells to those with the given covariate occurring in all celltypes 
      # (comment in if needed).
      # This can be used to avoid confounding of an important batch variable.
      # E.g., if you want the differential expression across cell types, and sample is a 
      # batch variable to control for, you need to ensure that each sample contains all
      # considered cell types.
      # common: sample
    # Genes to plot
    genes_of_interest:
      - MITF
      - MYCN


species: mouse

spike-ins:
  # Regular expression pattern to detect spike-ins.
  pattern: "^ERCC"

filtering:
  # Remove all genes with a  mean count less than the given threshold.
  min-avg-count: 1