-
Notifications
You must be signed in to change notification settings - Fork 0
/
cluster_metric_graphs.R
executable file
·127 lines (110 loc) · 6.39 KB
/
cluster_metric_graphs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/mlab/data/software/R-3.2.1-F22/bin/Rscript
##!/usr/bin/Rscript
# creating multiple k-graphs
# There is a graph we need examining cluster distances, jaccard similarity, structural vs functional distances rho
# and p-values. We want this in a single graphic that makes it easy to compare across num_ligands and metric
library(ggplot2)
library(Cairo)
#setwd("../output")
args = commandArgs(trailingOnly=TRUE)
setwd(args[1])
load("four_measures_over_k.RData")
level_order <- c("jaccard", "max - sumdiff", "rho", "-1 * log10(p)")
# normal clusters
normal_jaccard <- data.frame(cluster = seq(1, 30), value = jaccard.norm, type = "jaccard", n_lig = "all")
normal_sumdiff <- data.frame(cluster = seq(1, 30), value = max(sumdiff.norm) - sumdiff.norm, type = "max - sumdiff", n_lig = "all")
normal_rho <- data.frame(cluster = rep(seq(5, 30), 4),
value = c(rhop.1.norm$rho[3:28],
rhop.2.norm$rho[3:28],
rhop.3.norm$rho[3:28],
rhop.4.norm$rho[3:28]),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = 26))
normal_rho_0 <- data.frame(cluster = rep(seq(3, 30), 4),
value = c(rhop.1.norm$rho,
rhop.2.norm$rho,
rhop.3.norm$rho,
rhop.4.norm$rho),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = 28))
normal_p <- data.frame(cluster = rep(seq(3, 30), 4),
value = c(rhop.1.norm$p_value,
rhop.2.norm$p_value,
rhop.3.norm$p_value,
rhop.4.norm$p_value),
type = "-1 * log10(p)",
n_lig = rep(c("1", "2", "3", "4"), each = 28))
min_p <- min(normal_p$value[normal_p$value != 0])
normal_p$value[normal_p$value <= 1e-10] <- 1e-10
normal_p$value <- -1 * log10(normal_p$value)
normal_metrics <- do.call(rbind, list(normal_jaccard, normal_sumdiff, normal_rho_0, normal_p))
normal_metrics$type <- ordered(normal_metrics$type, levels = level_order)
Cairo(file = "four_measures_normal.png", type = "png")
ggplot(normal_metrics, aes(x = cluster, y = value, color = n_lig)) + geom_point() + geom_line() +
facet_grid(type ~ ., scales = "free_y") + ggtitle("normal")
dev.off()
### compressed clusters
###
compressed_jaccard <- data.frame(cluster = seq(1, length(jaccard.comp)), value = jaccard.comp, type = "jaccard", n_lig = "all")
compressed_sumdiff <- data.frame(cluster = seq(1, length(sumdiff.comp)), value = max(sumdiff.comp) - sumdiff.comp, type = "max - sumdiff", n_lig = "all")
compressed_rho <- data.frame(cluster = rep(seq(5, 30), 4),
value = c(rhop.1.comp$rho[3:28],
rhop.2.comp$rho[3:28],
rhop.3.comp$rho[3:28],
rhop.4.comp$rho[3:28]),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = 26))
compressed_rho_0 <- data.frame(cluster = rep(seq(3, length(rhop.1.comp$rho)+2), 4),
value = c(rhop.1.comp$rho,
rhop.2.comp$rho,
rhop.3.comp$rho,
rhop.4.comp$rho),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = length(rhop.1.comp$rho)))
compressed_p <- data.frame(cluster = rep(seq(3, length(rhop.1.comp$rho)+2), 4),
value = c(rhop.1.comp$p_value,
rhop.2.comp$p_value,
rhop.3.comp$p_value,
rhop.4.comp$p_value),
type = "-1 * log10(p)",
n_lig = rep(c("1", "2", "3", "4"), each = length(rhop.1.comp$rho)))
compressed_p$value <- -1 * log10(compressed_p$value)
compressed_metrics <- do.call(rbind, list(compressed_jaccard, compressed_sumdiff, compressed_rho_0, compressed_p))
compressed_metrics$type <- ordered(compressed_metrics$type, levels = level_order)
Cairo(file = "four_measures_compressed.png", type = "png")
ggplot(compressed_metrics, aes(x = cluster, y = value, color = n_lig)) + geom_point() + geom_line() +
facet_grid(type ~ ., scales = "free_y") + ggtitle("Compressed")
dev.off()
### combined cluster measures
all_jaccard <- data.frame(cluster = seq(1, 30), value = jaccard.all, type = "jaccard", n_lig = "all")
all_sumdiff <- data.frame(cluster = seq(1, 30), value = max(sumdiff.all) - sumdiff.all, type = "max - sumdiff", n_lig = "all")
all_rho <- data.frame(cluster = rep(seq(5, 30), 4),
value = c(rhop.1.all$rho[3:28],
rhop.2.all$rho[3:28],
rhop.3.all$rho[3:28],
rhop.4.all$rho[3:28]),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = 26))
all_rho_0 <- data.frame(cluster = rep(seq(3, 30), 4),
value = c(rhop.1.all$rho,
rhop.2.all$rho,
rhop.3.all$rho,
rhop.4.all$rho),
type = "rho",
n_lig = rep(c("1", "2", "3", "4"), each = 28))
all_p <- data.frame(cluster = rep(seq(3, 30), 4),
value = c(rhop.1.all$p_value,
rhop.2.all$p_value,
rhop.3.all$p_value,
rhop.4.all$p_value),
type = "-1 * log10(p)",
n_lig = rep(c("1", "2", "3", "4"), each = 28))
min_p <- min(all_p$value[all_p$value != 0])
all_p$value[all_p$value <= 1e-10] <- 1e-10
all_p$value <- -1 * log10(all_p$value)
all_metrics <- do.call(rbind, list(all_jaccard, all_sumdiff, all_rho_0, all_p))
all_metrics$type <- ordered(all_metrics$type, levels = level_order)
Cairo(file = "four_measures_combined.png", type = "png")
ggplot(all_metrics, aes(x = cluster, y = value, color = n_lig)) + geom_point() + geom_line() +
facet_grid(type ~ ., scales = "free_y") + ggtitle("comined")
dev.off()