-
Notifications
You must be signed in to change notification settings - Fork 5
/
permutations.Rmd
116 lines (94 loc) · 3.31 KB
/
permutations.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
---
title: "Analysis of the permutations of re-anaysis flags "
output: html_document
---
```{bash}
Rscript --vanilla generate_permutations.R
```
When the permutaions have been geneated, we must analyze them. Clear the flags and set the working directory to permutations root:
```{r, collapse=TRUE}
source("implementation.R")
flags = c(
REMOVE_DUPLICATES = 0,
REMOVE_TYPESCRIPT = 0,
REMOVE_V8 = 0,
USE_AUTHORS_INSTEAD_COMMITTERS = 0,
UNCERTAINTY = 0
)
WORKING_DIR = "./artifact/permutations"
```
First, get the permutation results calculated previously:
- `est` = value of the estimate
- `se` = standard error
- `pv` = `NA` if the result was not signifficant, p-value limit otherwise)
For bootstrap, we get `est`, `se` and `sig` and `sigCons` which are boolean vectors telling us whether the results were significant.
```{r}
loadPermutation = function(b, name, permutation) {
lWeighted = read.csv(paste0(permutation, "/Data/languages_weighed.csv"))
rownames(lWeighted) = lWeighted$X
lWeighted = lWeighted %>% dplyr::select(-X)
lZeroSum = read.csv(paste0(permutation, "/Data/languages_zeroSum.csv"))
rownames(lZeroSum) = lZeroSum$X
lZeroSum = lZeroSum %>% dplyr::select(-X)
bootstrapFilename = paste0(permutation, "/Data/languages_bootstrap.csv")
if (file.exists(bootstrapFilename)) {
lBootstrap = read.csv(bootstrapFilename)
rownames(lBootstrap) = lBootstrap$X
lBootstrap = lBootstrap %>% dplyr::select(-X)
} else {
lBootstrap = NULL
}
# and create the result list
list(
name = name,
weighted = lWeighted,
zeroSum = lZeroSum,
bootstrap = lBootstrap
)
}
```
Let's now look at all permutations and loat them:
```{r}
loadAllPermutations = function(baseDir, b = baseline) {
dirs = list.dirs(baseDir, recursive = F)
result = list()
for (d in dirs) {
pName = strsplit(d, "/")
pName = pName[[1]][[length(pName[[1]])]]
if (nchar(pName) != length(flags))
next()
tryCatch({
result[[pName]] = loadPermutation(b, pName, d)
}, error = function(e) {
print(e)
cat("Permutation ", pName, "FAIL\n")
})
}
cat("Total valid permutations: ", length(result))
result
}
permutations = loadAllPermutations(WORKING_DIR)
```
Let's now create the skeleton for teh RQ1 table in the paper:
```{r}
result = baselineFSE_RQ1()
result = mergeDataFrames(result, baselineCACM_RQ1())
result = result %>% dplyr::select(FSE_coef, FSE_pv, CACM_coef, CACM_pv)
repetition = permutations[["00000"]]$weighted %>% dplyr::select(repetition_coef = coef, repetition_pv = pVal)
result = mergeDataFrames(result, repetition)
cleaned = permutations[["11111"]]$weighted %>% dplyr::select(clean_coef = coef, clean_pv = pVal, adjusted_fdr = pVal_fdr, adjusted_bonf = pVal_bonf)
result = mergeDataFrames(result, cleaned)
zeroSum = permutations[["11111"]]$zeroSum %>% dplyr::select(zeroSum_coef = coef, zeroSum_pv = pVal_bonf)
result = mergeDataFrames(result, zeroSum)
bootstrap = permutations[["11111"]]$bootstrap %>% dplyr::select(bootstrap_coef = coef, bootstrap_sig = sigCons)
result = mergeDataFrames(result, bootstrap)
result
```
And output it to the rq1 table:
```{r}
output_RQ1_table_repetition(result)
output_RQ1_table_reanalysis(result)
```
```{r}
remove(WORKING_DIR)
```