Skip to content

Commit

Permalink
updated simulation data
Browse files Browse the repository at this point in the history
  • Loading branch information
sgosline committed Sep 11, 2023
1 parent cd700df commit 09a3998
Show file tree
Hide file tree
Showing 7 changed files with 174 additions and 156 deletions.
63 changes: 32 additions & 31 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
tags
test/*
.pdf

.RData
.Rhistory
*~
.*~
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
tags
test/*
*.pdf
.RData
.Rhistory
*~
*csv
*tsv
.*~
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
Expand Down
Binary file added .gitignore.swp
Binary file not shown.
218 changes: 109 additions & 109 deletions metrics/data-sim/call-deconv-on-sim.cwl
Original file line number Diff line number Diff line change
@@ -1,109 +1,109 @@
#!/usr/bin/env cwltool
class: Workflow
label: call-deconv-on-sim
id: call-deconv-on-sim
cwlVersion: v1.2

requirements:
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: InlineJavascriptRequirement
- class: StepInputExpressionRequirement

inputs:
signature: ##name of matrix to sample from
type: string
protAlg: ##algorithm to run
type: string
simulation: ## permutation to test
type: string
default: '1'
dataType: ##mRNA or protein data
type: string
default: 'prot'
simType: ##data simulated from mrna or protein
type: string
default: 'prot'
sample: ##how much of the permuted sample do we test
type: int
default: 100
sampleRep: ##
type: int
default: 1

outputs:
# matrix:
# type: File
# outputSource: get-sim-data/matrix
# cellPred:
# type: File
# outputSource: get-sim-data/cellType
# deconvoluted:
# type: File
# outputSource: deconv-prot/deconvoluted
# deconv:
# type: File
# outputSource: match-prot-to-sig/updated-deconv
cell-cor-file:
type: File
outputSource: celltype-cor/corr

steps:
get-sig-mat:
run: ../../signature_matrices/get-signature-matrix.cwl
in:
sigMatrixName: signature
subsample: sample
out:
[sigMatrix]
get-sim-data:
run: ../../simulatedData/sim-data-tool.cwl
in:
simNumber: simulation
simType: simType
out:
[matrix,cellType]
deconv-prot:
run: ../../tumorDeconvAlgs/run-deconv.cwl
in:
alg: protAlg
signature: get-sig-mat/sigMatrix
matrix: get-sim-data/matrix
out: [deconvoluted]
match-prot-to-sig:
run: ../../simulatedData/map-sig-tool.cwl
in:
deconv-matrix: deconv-prot/deconvoluted
sig-matrix: get-sig-mat/sigMatrix
deconv-type: simType
cell-matrix: get-sim-data/cellType
out: [updated-deconv,updated-cell-matrix]
celltype-cor:
run: ../correlations/deconv-corrXcelltypes-cwl-tool.cwl
in:
cancerType: simulation
mrnaAlg:
valueFrom: "cellFraction"
protAlg: protAlg
signature: signature
sampleVal: sample
sampleType: simType
sampleRep: sampleRep
proteomics:
source: match-prot-to-sig/updated-deconv
transcriptomics:
source: match-prot-to-sig/updated-cell-matrix
out: [corr]
# matrix-distance:
# run: ../distance/deconv-comparison-tool.cwl
# in:
# matrixA: match-prot-to-sig/updated-deconv
# matrixB: match-prot-to-sig/updated-cell-matrix
# cancerType: simulation
# aAlg: protAlg
# bAlg:
# valueFrom: "cellFraction"
# signature: get-sig-mat/sigMatrix
# sampleType: simType
# out:
# [dist]
#!/usr/bin/env cwltool
class: Workflow
label: call-deconv-on-sim
id: call-deconv-on-sim
cwlVersion: v1.2

requirements:
- class: SubworkflowFeatureRequirement
- class: MultipleInputFeatureRequirement
- class: InlineJavascriptRequirement
- class: StepInputExpressionRequirement

inputs:
signature: ##name of matrix to sample from
type: string
protAlg: ##algorithm to run
type: string
simulation: ## permutation to test
type: string
default: '1'
dataType: ##mRNA or protein data
type: string
default: 'prot'
simType: ##data simulated from mrna or protein
type: string
default: 'prot'
sample: ##how much of the permuted sample do we test
type: int
default: 100
sampleRep: ##
type: int
default: 1

outputs:
# matrix:
# type: File
# outputSource: get-sim-data/matrix
cellPred:
type: File
outputSource: match-prot-to-sig/updated-cell-matrix
# deconvoluted:
# type: File
# outputSource: deconv-prot/deconvoluted
deconv:
type: File
outputSource: match-prot-to-sig/updated-deconv
cell-cor-file:
type: File
outputSource: celltype-cor/corr

steps:
get-sig-mat:
run: ../../signature_matrices/get-signature-matrix.cwl
in:
sigMatrixName: signature
subsample: sample
out:
[sigMatrix]
get-sim-data:
run: ../../simulatedData/sim-data-tool.cwl
in:
simNumber: simulation
simType: simType
out:
[matrix,cellType]
deconv-prot:
run: ../../tumorDeconvAlgs/run-deconv.cwl
in:
alg: protAlg
signature: get-sig-mat/sigMatrix
matrix: get-sim-data/matrix
out: [deconvoluted]
match-prot-to-sig:
run: ../../simulatedData/map-sig-tool.cwl
in:
deconv-matrix: deconv-prot/deconvoluted
sig-matrix: get-sig-mat/sigMatrix
deconv-type: simType
cell-matrix: get-sim-data/cellType
out: [updated-deconv,updated-cell-matrix]
celltype-cor:
run: ../correlations/deconv-corrXcelltypes-cwl-tool.cwl
in:
cancerType: simulation
mrnaAlg:
valueFrom: "cellFraction"
protAlg: protAlg
signature: signature
sampleVal: sample
sampleType: simType
sampleRep: sampleRep
proteomics:
source: match-prot-to-sig/updated-deconv
transcriptomics:
source: match-prot-to-sig/updated-cell-matrix
out: [corr]
# matrix-distance:
# run: ../distance/deconv-comparison-tool.cwl
# in:
# matrixA: match-prot-to-sig/updated-deconv
# matrixB: match-prot-to-sig/updated-cell-matrix
# cancerType: simulation
# aAlg: protAlg
# bAlg:
# valueFrom: "cellFraction"
# signature: get-sig-mat/sigMatrix
# sampleType: simType
# out:
# [dist]
16 changes: 16 additions & 0 deletions metrics/data-sim/plotSampling.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
##plot simulation results with sampling


allfiles=list.files('.')
corfiles=allfiles[grep('correlation-',allfiles)]

fulltab<-do.call(rbind,lapply(corfiles,function(x) read.table(x,header=T)))

library(ggplot2)

res = ggplot(fulltab,aes(x=sample,y=value,col=cellType))+geom_jitter()+facet_grid(matrix~prot.algorithm)
ggsave('samplingResultsForProt.pdf',res,width=12)


##now do the mrna
res2<-ggplot(fulltab,aes(x=sample,y=value,fill=cellType))+geom_point()+facet_grid(matrix~prot.algorithm)
26 changes: 13 additions & 13 deletions metrics/data-sim/runSamplingManually.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,19 @@

filelist = []
for i in [0,1,2,3,4]:
estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM9 --simType prot --repNumber '+str(i)
print(estring)
os.system(estring)
fname='combined-cellType-correlation-'+str(i)+'.tsv'
os.rename(fname,'prot-LM9-'+fname)
filelist.append('prot-LM9-'+fname)

estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM7c --simType prot --repNumber '+str(i)
print(estring)
os.system(estring)
fname='combined-cellType-correlation-'+str(i)+'.tsv'
os.rename(fname,'prot-LM7c-'+fname)
filelist.append('prot-LM7c-'+fname)
# estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM9 --simType prot --repNumber '+str(i)
# print(estring)
# os.system(estring)
# fname='combined-cellType-correlation-'+str(i)+'.tsv'
# os.rename(fname,'prot-LM9-'+fname)
# filelist.append('prot-LM9-'+fname)

# estring = 'cwltool simul-data-sampling.cwl --prot-sigs LM7c --simType prot --repNumber '+str(i)
# print(estring)
# os.system(estring)
# fname='combined-cellType-correlation-'+str(i)+'.tsv'
# os.rename(fname,'prot-LM7c-'+fname)
# filelist.append('prot-LM7c-'+fname)

estring = 'cwltool simul-data-sampling.cwl --rna-sigs LM22 --simType mrna --repNumber '+str(i)
print(estring)
Expand Down
2 changes: 1 addition & 1 deletion metrics/data-sim/simul-data-sampling.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ inputs:
default: [10,20,40,60,80,100]
mrna-perms:
type: string[]
default: ['1','2','3','4','5']#,'6','7','8','9','10','pbmc']
default: ['1','2','3','4','5','6','7','8','9','10','pbmc']
prot-perms:
type: string[]
default: ['1','2','3','4','5']
Expand Down
5 changes: 3 additions & 2 deletions simulatedData/mapSimDataMatrices.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ main <- function(){
print(argv)
deconv.mat <- read.table(argv[1],header=T,row.names=1,sep='\t', check.names = F) #matrix to be fixed
sig.mat <- argv[2] #signature used to deconvolve matrix
sig.mat<-basename(sig.mat)
sig.mat <-stringr::str_replace(sig.mat,'_[0-9]*','') ##added this due to sampling changes!!!
sim.type <- tolower(argv[3]) #type of simulation performed
cell.type <-read.table(argv[4],header=T,row.names=1,sep='\t', check.names = F)

sig.mat<-basename(sig.mat)

print(paste("mapping",sig.mat,"predictions to",sim.type,'simulations'))
over <- intersect(rownames(deconv.mat),rownames(cell.type))
print(paste0('Sig cell types: ',paste(rownames(deconv.mat),collapse=',')))
print(paste0('Simulated cell types: ',paste(rownames(cell.type),collapse=',')))
Expand All @@ -24,7 +25,7 @@ main <- function(){
##maximize overlap
rownames(deconv.mat)<-rownames(deconv.mat)%>%
stringr::str_replace('Neutrophil$','Neutrophils')%>%
stringr::str_replace(fixed('MO'),'Monocytes')%>%
stringr::str_replace(stringr::fixed('MO'),'Monocytes')%>%
stringr::str_replace('^T8 cells$','CD8 T cells')%>%
stringr::str_replace('^T4 cells$','CD4 T cells')%>%
stringr::str_replace(fixed('B-cells'),'B cells')%>%
Expand Down

0 comments on commit 09a3998

Please sign in to comment.