-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.Rmd
125 lines (92 loc) · 4.53 KB
/
index.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
---
title: "Amino Acids 3D PCA"
output: html_document
---
```{r, warning=F, message=F, echo=F, results='hide'}
library(tidyverse)
library(RColorBrewer)
library(rebus)
library(readxl)
library(ComplexHeatmap)
library(circlize)
library(ggsci)
library(plotly)
library(ggrepel)
library(gridExtra)
library(cowplot)
library(broom)
theme_set(theme_bw() + theme(axis.text = element_text(color = "black"),
axis.title = element_text(colour = "black"),
strip.background = element_blank(),
strip.text = element_text(colour = "black",
face = "bold")))
set.seed(2019)
# Read dataset and tidy up
path = "/Users/Boyuan/Desktop/My publication/7th. HILIC amino acid & PCA/600 samples/600 SAMPLE ALL DATA.xlsx"
df.inj.conc = read_excel(path , sheet = "injected concentration") # read injection concentration dataset
df.inj.conc %>% duplicated() %>% sum() # checking duplicated rows
amino.acids = (df.inj.conc %>% colnames())[-c(1:3)] # extract all amino acids analyzed
# Read sample mass and traits dataset
df.mass = read_excel(path, sheet = "sample mass", range = "A1:V601")
# remove missing rows (labels created for non-existing samples)
df.mass = df.mass %>% filter(`Note4_missing row` != "YES")
df.mass = df.mass %>% mutate(`Mass (mg)` = as.numeric(`Mass (mg)`))
# Computate content in sample ----
df.all.data.tidy = df.inj.conc %>% left_join(df.mass, by = "Name") %>% # join datasets of content and sample info
gather(amino.acids, key = compounds, value = inj.conc) %>% # gather compounds
# content in mg / 100 g dry sample
# x 100 dil factor x 10 mL, convert to mg cmpd, then normalized by sample mass
mutate(content = inj.conc * (100 * 10) /1000/1000 / `Mass (mg)` * 1000 * 100,
Category = factor(Category, levels = c("Nightshade", "Amaranth", "Spider plant", "Mustard"),
ordered = T))
unique.compounds = df.all.data.tidy$compounds %>% unique()
## Specify Colors ----
# Manual assignment for category (most important one)
unique.categories = df.all.data.tidy$Category %>% unique()
color.category = c("Black", "Steelblue", "Firebrick" , "Darkgreen")
names(color.category) = unique.categories
df.all.data = df.all.data.tidy %>%
select(Name, Category, Species, Cultivar, Rep,# plant varieties
Year, Harvest, Season, Site, # environment
compounds, content) %>%
spread(key = compounds, value = content) # each amino acid, each column
category = df.all.data$Category %>% as.factor() # category as a factor vector
col.pairs = brewer.pal(n_distinct(category), "Set1") # colors for category
mat.content = df.all.data[, amino.acids] %>% as.matrix() # pure content matrix
rownames(mat.content) = df.all.data$Category
# Compute PCA -<>--<>--<>--<>--<>--<>--<>--<>--<>--<>--<>----- ----
mat.content.scaled = mat.content %>% scale(center = T, scale = T)
# Check eigenvalues
content.scaled.covarianceMatrix = mat.content.scaled %>% var()
eigen.values = (content.scaled.covarianceMatrix %>% eigen())$values
# contribution percentage of the major PC's
PC1.percent = (eigen.values[1] / eigen.values %>% sum() * 100) %>% round()
PC2.percent = (eigen.values[2] / eigen.values %>% sum() * 100) %>% round()
PC3.percent = (eigen.values[3] / eigen.values %>% sum() * 100) %>% round()
# Check eigenvectors
eigen.vectors = (mat.content.scaled %>% var() %>% eigen())$vectors
eigen.vectors = -eigen.vectors
rownames(eigen.vectors) = colnames(mat.content.scaled)
# Recall that eigenvector elements are the coefficients of corresponding original variables
mat.PC = mat.content.scaled %*% (eigen.vectors)
df.PC = df.all.data %>%
select(Name, Category, Species, Cultivar, Harvest, Season, Site) %>%
cbind(mat.PC[, 1:3]) %>% as_tibble() %>%
rename(PC1 = `1`, PC2 = `2`, PC3 = `3`)
df.PC %>% head(); df.PC %>% tail()
```
```{r, warning=F, message=F, echo=F, fig.height=11, fig.width=11}
# 3D PCA plot
plot_ly(df.PC, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Category,
colors = color.category) %>%
add_markers() %>%
layout(
scene = list(xaxis = list(title = paste("Standardized PC1, ", PC1.percent, "% contribution"),
range = c(-6, 5)),
yaxis = list(title = paste("Standardized PC2, ", PC2.percent, "% contribution"),
range = c(-5, 5)),
zaxis = list(title = paste("Standardized PC3, ", PC3.percent, "% contribution"),
range = c(-4, 4))
)
)
```