From af0d2f5b320df61ad3d88ee960617c71e4b801b8 Mon Sep 17 00:00:00 2001
From: Ashley Richardson <richaa21@li03c02.chimera.hpc.mssm.edu>
Date: Wed, 7 Feb 2024 13:29:29 -0500
Subject: [PATCH] message

---
 Harmonized_Seurat.RMD | 500 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 500 insertions(+)
 create mode 100644 Harmonized_Seurat.RMD

diff --git a/Harmonized_Seurat.RMD b/Harmonized_Seurat.RMD
new file mode 100644
index 0000000..8b4a5ca
--- /dev/null
+++ b/Harmonized_Seurat.RMD
@@ -0,0 +1,500 @@
+---
+title: "merge_ind"
+author: "Ashley Richardson"
+date: "2024-02-01"
+output: html_document
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(echo = TRUE)
+```
+
+```{r}
+.libPaths(c("/hpc/packages/minerva-centos7/rpackages/4.3.0/site-library", "/hpc/packages/minerva-centos7/rpackages/bioconductor/3.17", .libPaths())) 
+library(Seurat)
+libraru(dyplr)
+```
+
+
+## Merge my three seurat objects. 
+pbmc.unstim - baseline pbmcs from healthy donors and PD
+pbmc.m - monomer a-syn stimulated pbmcs from healthy donors and PD
+pbmc.f - fibril a-syn stimulated pbmcs from healthy donors and PD
+```{r}
+m_2 <- merge(pbmc.unstim, y = c(pbmc.m, pbmc.f), add.cell.ids = c("u", "m","f"),
+             project = "pbmc_PD",
+             merge.data = TRUE)
+```
+
+## QC of merged object. 
+```{r}
+#Visualize QC metrics as a violin plot - should be per donor, but need demuxlet first
+VlnPlot(m_2, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = "orig.ident") #this should be by subjectID
+
+#### Preprocessing ####
+source("/sc/arion/projects/ad-omics/sc_PBMC/snake/scripts/npcs.R") #source npcs function to estimate the number of PCs to use
+source("/sc/arion/projects/ad-omics/sc_PBMC/snake/scripts/preProcessSC.R") #using function so we can call same one in next rule
+
+#Run the desired normalization, scaling, PCA, dimensional reduction
+srat.merge <- preProcessSC(m_2)
+
+#Create preprocessing plots
+pc_var = c(srat.merge@reductions[["umap"]]@misc[["n.pcs.used"]], npcs(srat.merge, reduction = "pca", var.total = 0.90),
+           npcs(srat.merge, reduction = "pca", var.total = 0.85), npcs(srat.merge, reduction = "pca", var.total = 0.80))
+
+## elbow plot to check the PCs and where 90% of variation occurs. In this case, it is at 30 PCs 
+ElbowPlot(srat.merge, ndims=50) +
+  geom_vline(xintercept = pc_var[1], color = "blue", linetype="dashed") +
+  geom_vline(xintercept = pc_var[2], color = "blue", linetype="dotted") +
+  geom_vline(xintercept = pc_var[3], color = "blue", linetype="dotted") +
+  geom_vline(xintercept = pc_var[4], color = "blue", linetype="dotted") +
+  geom_text(aes(x=pc_var[1], label="\n95% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[2], label="\n90% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[3], label="\n85% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[4], label="\n80% variation", y=10), colour="blue", angle=90, check_overlap = T)
+
+DimHeatmap(srat.merge, dims = 1:15, cells = 500, balanced = TRUE)
+DimPlot(srat.merge, reduction = "pca")
+DimPlot(srat.merge, reduction = "umap", label = TRUE, group.by = "condition")
+
+#Save preprocessing plots
+pdf(file=paste0("plots.preprocessing.pdf"), height = 9, width = 17) 
+ep
+hmap
+pcap
+umapp
+dev.off()
+```
+
+
+## Harmonize the merged seurat object
+```{r}
+#if multiple covariates were provided, fix formatting for use
+if(str_detect(covars, ",")){
+	covars = strsplit(covars, ",")[[1]]
+}
+
+#Run Harmony
+srat.merge <- srat.merge %>% 
+  RunHarmony(group.by.vars= "orig.ident",
+             reduction.use='pca', plot_convergence = F,
+             verbose=TRUE)
+
+n.pcs = npcs(srat.merge,reduction="harmony")
+
+srat.merge <- 
+  srat.merge %>% RunUMAP(
+    reduction = 'harmony',
+    dims = 1:n.pcs,
+    reduction.name='umap_harmony'
+  )  
+srat.merge@reductions$umap_harmony@misc$n.pcs.used <- n.pcs
+
+srat.merge <- 
+  srat.merge %>% FindNeighbors(
+    reduction = 'harmony',
+    dims = 1:n.pcs, 
+    graph.name = 'harmony_snn',
+    force.recalc = TRUE,
+    verbose = FALSE
+  )
+
+srat.merge <- FindClusters(
+  object = srat.merge, 
+  resolution = 0.4,
+  graph.name='harmony_snn'
+)
+srat.merge[['harmony_res.0.4']] <- as.numeric(srat.merge@active.ident) # save idents as numerics
+
+#Create integration plots
+##choosing number of PCs
+pc_var_harm = c(srat.merge@reductions[["umap_harmony"]]@misc[["n.pcs.used"]], npcs(srat.merge, reduction = "harmony", var.total = 0.90),
+           npcs(srat.merge, reduction = "harmony", var.total = 0.85), npcs(srat.merge, reduction = "harmony", var.total = 0.80))
+ep_harm = ElbowPlot(srat.merge, ndims=50) +
+  geom_vline(xintercept = pc_var[1], color = "blue", linetype="dashed") +
+  geom_vline(xintercept = pc_var[2], color = "blue", linetype="dotted") +
+  geom_vline(xintercept = pc_var[3], color = "blue", linetype="dotted") +
+  geom_vline(xintercept = pc_var[4], color = "blue", linetype="dotted") +
+  geom_text(aes(x=pc_var[1], label="\n95% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[2], label="\n90% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[3], label="\n85% variation", y=10), colour="blue", angle=90, check_overlap = T) +
+  geom_text(aes(x=pc_var[4], label="\n80% variation", y=10), colour="blue", angle=90, check_overlap = T)
+
+##UMAPs
+dp_harms <- DimPlot(object = srat.merge, reduction = "harmony", pt.size = .1, group.by = "orig.ident") + NoLegend()
+vp_harms <- VlnPlot(object = srat.merge, features = "harmony_1", group.by = "orig.ident", pt.size = .1) + NoLegend()
+qc_harm = plot_grid(dp_harms,vp_harms)
+
+dp_harm_af = DimPlot(srat.merge, reduction = "umap_harmony", group.by = "orig.ident") + plot_annotation(title = "After integration (Harmony)")
+dp_harm_bf_splt = DimPlot(srat.merge, reduction = "umap_harmony", group.by = "orig.ident", pt.size = .1, split.by = 'orig.ident') + NoLegend()
+dp_harm_bf = DimPlot(srat.merge, reduction = "umap", group.by = "orig.ident") + NoLegend() + plot_annotation(title = "Before integration (Harmony)")
+#both
+compdp = cowplot::plot_grid(ncol = 2, dp_harm_bf, dp_harm_af)
+
+#Save integration plots
+pdf(file= "plots.harmony.pdf", height = 9, width = 17) 
+ep_harm
+qc_harm
+dp_harm_bf
+dp_harm_bf_splt
+dp_harm_af
+compdp
+dev.off()
+
+#save file
+saveRDS(srat.merge, file="srat.merge.RDS")
+```
+
+```{r}
+srat.diet <- DietSeurat(srat.merge, assays = "RNA", dimreducs =c('pca','umap','umap_harmony'), graphs = 'harmony_snn')
+```
+
+## SingleR Annotation 
+Automatic annotation. Code from Mikaela's github. 
+
+```{r}
+library(SingleR)
+library(celldex)
+library(ggeasy)
+library(ggplot2)
+library(pheatmap)
+
+srat.diet <- JoinLayers(srat.diet)
+srat = GetAssayData(object = srat.diet, layer = "counts", assay = "RNA")
+ref <- HumanPrimaryCellAtlasData()
+
+#bulk prediction using Immune Cell Expression 
+pred.bulk <- SingleR(test = srat, ref = ref, labels = ref$label.fine)
+
+plotScoreHeatmap(pred.bulk)
+plotDeltaDistribution(pred.bulk, ncol = 3)
+
+srat.diet[["singleR.monaco"]] <- pred.bulk$labels
+DimPlot(srat.diet, reduction = "umap_harmony", group.by = "singleR.monaco", label = TRUE, raster = FALSE) + NoLegend()
+
+markers.df = read.csv("/sc/arion/projects/ad-omics/sc_PBMC/snake/markers.csv")
+DotPlot(srat.diet, features = na.omit(c(markers.df[,2],"CD4")), group.by = "singleR.monaco") + easy_rotate_x_labels(angle = 45, side = "right")
+
+#feature plots
+features = c("CD4","CD8A","SELL","CCR7","GZMA","GZMH","GZMK","FOXP3","TRDV1","TRDV2","TRGV9","TRAV1-2")
+FeaturePlot(srat.diet, reduction = "umap_harmony", features = features)
+```
+
+```{R}
+#counting the cell proportions
+
+d = srat.diet@meta.data[,c("seurat_clusters","singleR.monaco")]
+
+d2 = d %>% 
+  group_by(seurat_clusters) %>% 
+  mutate(total = n()) %>%
+  group_by(seurat_clusters, singleR.monaco) %>% 
+  mutate(count = n(),
+    Percentage = 100 * (count / total))
+
+d2 = unique(d2)
+ggplot(d2, aes(x = singleR.monaco, y = seurat_clusters, fill = Percentage)) + geom_tile() + geom_text(aes(label = round(Percentage, 1), color = ifelse(Percentage>4, "black", "white"))) + scale_colour_manual(values=c("white"="white", "black"="black"),guide="none") + theme_classic() + easy_rotate_x_labels(angle = 45, side = "right") + scale_fill_gradient(low="white", high="blue")
+```
+
+
+```{r}
+cells_ann <- as.data.frame(table(srat.diet@meta.data$singleR.monaco))
+colnames(cells_ann) <- c("singleR.monaco", "number of cells")
+data.frame(cells_ann)
+```
+```{r}
+DimPlot(srat.diet, reduction = 'umap_harmony', group.by = 'condition', label = TRUE, repel = TRUE) + NoLegend()
+```
+
+
+```{r}
+DimPlot(srat.diet, reduction = 'umap_harmony', group.by = 'singleR.monaco', label = TRUE, repel = TRUE) + NoLegend()
+
+```
+
+
+```{r}
+library(kableExtra)
+Idents(srat.diet) <- "seurat_clusters"
+prop.table(table(Idents(srat.diet), srat.diet$condition), margin = 2) %>%
+  kable(row.names = T) %>%
+  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
+
+```
+
+Quickly check the proportion of the clusters. 
+```{r}
+pt <- table(Idents(srat.diet), srat.diet$condition)
+pt <- as.data.frame(pt)
+pt$Var1 <- as.character(pt$Var1)
+
+library(ggplot2)
+library(RColorBrewer)
+ggplot(pt, aes(x = Var2, y = Freq, fill = Var1)) +
+  theme_bw(base_size = 15) +
+  geom_col(position = "fill", width = 0.5) +
+  xlab("Sample") +
+  ylab("Proportion") +
+  scale_fill_manual(values = brewer.pal(12, "Paired")) +
+  theme(legend.title = element_blank())
+
+```
+
+
+## Guided/Manual annotation 
+The singleR annotations arent great.. there are a lot of labels that apply to just 1-10 cells, so labeling the clusters is a little difficult. 
+Therefore, I will add manual annotations based on markers expressed in the clusters and the SingleR annotations. 
+
+```{r}
+pbmc.markers <- FindAllMarkers(srat.diet, only.pos = TRUE)
+pbmc.markers %>%
+    group_by(cluster) %>%
+    dplyr::filter(avg_log2FC > 1)
+
+pbmc.markers %>%
+    group_by(cluster) %>%
+    dplyr::filter(avg_log2FC > 1) %>%
+    slice_head(n = 10) %>%
+    ungroup() -> top10
+top10
+```
+
+
+```{r}
+FeaturePlot(srat.diet, features = c("MS4A1", "GNLY", "CD3E", "CD14", "FCER1A", "FCGR3A", "LYZ", "PPBP",
+    "CD8A"))
+
+```
+
+
+```{r}
+## To do a heatmap on the integrated seurat, I needed to subset and plot less cells for the graph to work.  
+png(filename="heatmap.top10.png", width = 1000, height = 1500)
+DoHeatmap(subset(srat.diet, downsample = 1000), 
+          features = top10$gene)
+dev.off()
+```
+
+
+
+Lets again look at the features plot in which we give a file with known features. This wil help confirm the annotations
+```{r}
+markers.df = read.csv("/sc/arion/projects/ad-omics/sc_PBMC/snake/markers.csv")
+png("knownfeature.plot.png", width = 1000, height = 1000)
+DotPlot(srat.diet, features = na.omit(markers.df[,2],"CD4")) + easy_rotate_x_labels(angle = 45, side = "right")
+dev.off()
+```
+
+I played around with this type of plot to see which markers showed up in a specific cluster. 
+```{r}
+library(ggeasy)
+markers = c("CD8A")
+DotPlot(srat.diet, features = markers, group.by = "seurat_clusters") +
+  easy_rotate_x_labels(angle = 45, side = "right") 
+```
+
+
+
+## Guided Annotation Labels
+Using the above heatmap as a guide, I will add annotations for each cluster. 
+```{r}
+srat.diet@meta.data$cell.type = as.numeric(as.character(srat.diet@meta.data$seurat_clusters))
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==0] <- "CD4+ EM T cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==1] <- "CD8+ T cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==2] <- "gd T Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==3] <- "CD4+ T Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==4] <- "NK Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==5] <- "CD4+ CM T cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==6] <- "gd T Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==7] <- "gd T Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==8] <- "gd T Cells"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==9] <- "B cells/Undefined"
+srat.diet@meta.data$cell.type[srat.diet@meta.data$cell.type==10] <- "gd T Cells"
+```
+
+
+```{r} 
+# my manual annotation
+Idents(srat.diet) <- "cell.type"
+prop_donor <- as.data.frame(prop.table(table(Idents(srat.diet), srat.diet$condition), margin = 2))
+colnames(prop_donor) <- c("cell_type", "condition", "prop")
+
+library(ggplot2)
+library(ggeasy)
+ggplot(data = prop_donor, aes(x = condition, y = prop, fill = cell_type)) +
+  geom_bar(stat = "identity") +
+  theme_classic() +
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Stimulation", y = "Cell proportion") 
+```
+We can see how singleR annotations look too... not so great. 
+```{r}
+## singleR annotation 
+Idents(srat.diet) <- "singleR.monaco"
+prop_donor <- as.data.frame(prop.table(table(Idents(srat.diet), srat.diet$condition), margin = 2))
+colnames(prop_donor) <- c("cell_type", "condition", "prop")
+
+library(ggplot2)
+ggplot(data = prop_donor, aes(x = condition, y = prop, fill = cell_type)) +
+  geom_bar(stat = "identity") +
+  theme_classic() +
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Stimulation", y = "Cell proportion") 
+```
+
+Lets compare my annotations to SingleR anotations. 
+```{r}
+library(pheatmap)
+#make table of your two annotation columns
+d = srat.diet@meta.data[,c("cell.type","singleR.monaco")]
+#calculate frequencies 
+freqs2 <- apply(table(d), 1, function(i) i/sum(i))
+#plot
+png("annotation_comparism.png", width = 1000, height = 1000)
+pheatmap(freqs2)
+dev.off()
+```
+
+
+```{r}
+DimPlot(srat.diet, reduction = 'umap_harmony', group.by = 'cell.type', label = TRUE, repel = TRUE) + NoLegend()
+```
+
+
+## Addition of patient Meta Data. 
+
+```{r}
+metadata = read.csv("/sc/arion/projects/ad-omics/ashley/PD_Stim/Donor_MetaData_short.csv", header = T, stringsAsFactors = F, check.names = F)
+metadata <- metadata[-1]
+
+colnames(metadata)
+colnames(metadata) <- gsub(" ", "_", colnames(metadata))
+
+data_tmp = srat.diet@meta.data %>% left_join(metadata[, c("DMX_maxID", "DX", "Sex", "Age")], by="DMX_maxID")
+srat.diet[["DX"]] <- as.character(data_tmp$DX)
+srat.diet[["Sex"]] <- as.character(data_tmp$Sex)
+srat.diet[["Age"]] <- as.character(data_tmp$Age)
+srat.diet[["DMX_maxID"]] <- as.character(data_tmp$DMX_maxID)
+
+head(srat.diet@meta.data)
+```
+
+## Douplet Removal
+Douplets were analyzed in the individual Seurat Objects, but not removed. 
+Here, I will analyze them again and remove them based on demuxlet. 
+
+```{r}
+table(srat.diet@meta.data[,c("DMX_classification.global","condition")]) #number of singlets or doublets identified as each donor
+```
+
+```{r}
+table(srat.diet@meta.data[,c("DMX_classification.global","DX")]) 
+## by diagnosis
+```
+
+```{r}
+table(srat.diet@meta.data[,c("DMX_classification.global","DMX_maxID")]) 
+## by diagnosis
+```
+
+```{r}
+VlnPlot(srat.diet, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, group.by = "DMX_classification.global")
+```
+Remove douplets
+```{r}
+#douplet removal
+srat.diet <- subset(srat.diet, subset = DMX_classification.global != "DBL")
+
+#confirm that there are no doublets left 
+table(srat.diet$DMX_classification.global)
+```
+
+
+
+## Comparison of cell proportions 
+```{r}
+selected_data <- srat.diet@meta.data[,c("DX", "condition", "cell.type")]
+
+#PD & Baseline
+PD_Base <- selected_data[selected_data$DX == "PD" & selected_data$condition == "Baseline", ]
+count_table <- table(PD_Base)
+PD_Base_Prop <- as.data.frame(prop.table(count_table, margin = 2))
+#CO & Baseline
+CO_Base <- selected_data[selected_data$DX == "CO" & selected_data$condition == "Baseline", ]
+count_table2 <- table(CO_Base)
+CO_Base_Prop <- as.data.frame(prop.table(count_table2, margin = 2))
+
+#PD & Monomer
+PD_Mon <- selected_data[selected_data$DX == "PD" & selected_data$condition == "Monomer", ]
+count_table3 <- table(PD_Mon)
+PD_Mon_Prop <- as.data.frame(prop.table(count_table3, margin = 2))
+#CO & Monomer
+CO_Mon <- selected_data[selected_data$DX == "CO" & selected_data$condition == "Monomer", ]
+count_table4 <- table(CO_Mon)
+CO_Mon_Prop <- as.data.frame(prop.table(count_table4, margin = 2))
+
+#PD & Fibril
+PD_Fib <- selected_data[selected_data$DX == "PD" & selected_data$condition == "Fibril", ]
+count_table5 <- table(PD_Fib)
+PD_Fib_Prop <- as.data.frame(prop.table(count_table5, margin = 2))
+#CO & Fibril
+CO_Fib <- selected_data[selected_data$DX == "CO" & selected_data$condition == "Fibril", ]
+count_table6 <- table(CO_Fib)
+CO_Fib_Prop <- as.data.frame(prop.table(count_table6, margin = 2))
+```
+
+
+```{r}
+library(ggplot2)
+library(ggeasy)
+
+## Baseline PD vs CO
+df_baseline <- dplyr::union(CO_Base_Prop, PD_Base_Prop)
+
+ggplot(data = df_baseline, aes(x = DX, y = Freq, fill = cell.type)) +
+  geom_bar(stat = "identity") +
+  facet_wrap(~condition, ncol = 4) + 
+  theme_classic() +  
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Diagnosis", y = "Cell proportion") 
+
+## Control Baseline, Monomer, Fibril
+df_CO <- dplyr::union(CO_Base_Prop, CO_Mon_Prop)
+df_CO <- dplyr::union(df_CO, CO_Fib_Prop)
+ggplot(data = df_CO, aes(x = DX, y = Freq, fill = cell.type)) +
+  geom_bar(stat = "identity") +
+  facet_wrap(~condition, ncol = 4) + 
+  theme_classic() +
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Diagnosis", y = "Cell proportion") 
+
+## PD Baseline, Monomer, Fibril
+df_PD <- dplyr::union(PD_Base_Prop, PD_Mon_Prop)
+df_PD <- dplyr::union(df_PD, PD_Fib_Prop)
+ggplot(data = df_PD, aes(x = DX, y = Freq, fill = cell.type)) +
+  geom_bar(stat = "identity") +
+  facet_wrap(~condition, ncol = 4) + 
+  theme_classic() +
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Diagnosis", y = "Cell proportion") 
+
+## Everything
+df_all <- dplyr::union(df_CO, df_PD)
+ggplot(data = df_all, aes(x = DX, y = Freq, fill = cell.type, label_value(frequency(Freq)))) +
+  geom_bar(stat = "identity") +
+  facet_wrap(~condition, ncol = 4) + 
+  theme_classic() +
+  easy_rotate_x_labels(angle = 45, side = "right") +
+  labs(x = "Diagnosis", y = "Cell proportion") 
+
+```
+
+
+```{r}
+saveRDS(srat.diet, file = "/sc/arion/projects/ad-omics/ashley/PD_Stim/srat.diet.feb5.RDS")
+```
+
+
+
+
+