forked from heyuan7676/COVID-19
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_tissue_wise_TPM.R
33 lines (26 loc) · 1.34 KB
/
generate_tissue_wise_TPM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
source("utils.R")
dir.create(file.path(datadir, "tissue_tpm/"), showWarnings = FALSE)
tpm.whole = fread(paste0(datadir, 'gene_pc_lc_tpm.txt'))
for (tissue in sort(unique(samples$SMTSD))){
print(tissue)
# sample covariates
sample_in_the_tissue = samples %>% filter(SMTSD == tissue)
# restrict to the samples used in GTEx
tis = gsub(" ", "_", gsub('\\)', '', gsub(' \\(', '_', gsub(' - ', '_', tissue))))
genotype_PCs = tryCatch(read.table(paste0(datadir, 'GTEx_Analysis_v8_eQTL_covariates/',tis,'.v8.covariates.txt'),
sep='\t', header = T, stringsAsFactors = F, row.names = 1), warning = function (w) {print(paste("No data available for tissue type", tis))}, error = function(f) {return("failed")})
if(inherits(genotype_PCs, "character")){
print(paste(" ", "Skipping tissue", tis))
next
}
samples_used = gsub("\\.", "-", colnames(genotype_PCs))
sample_in_the_tissue = sample_in_the_tissue %>% filter(SUBJID %in% samples_used)
# gene TPM
gene_tpm_in_the_tissue = tpm.whole[,.SD, .SDcols=sample_in_the_tissue$SAMPID]
gene_tpm_in_the_tissue = as.data.frame(gene_tpm_in_the_tissue)
rownames(gene_tpm_in_the_tissue) = tpm.whole$Name
write.table(gene_tpm_in_the_tissue,
paste0(datadir, 'tissue_tpm/', tis, '_gene_TPM.txt'),
sep='\t')
rm(gene_tpm_in_the_tissue)
}