Skip to content

Commit

Permalink
Merge pull request #225 from PNNL-CompBio/dev
Browse files Browse the repository at this point in the history
removed dependency on cptac pacakge at runtime
  • Loading branch information
sgosline authored Sep 20, 2023
2 parents 6fd97ef + 0c59640 commit 560577e
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 87 deletions.
7 changes: 6 additions & 1 deletion mRNAData/getAllDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
'''

import cptac

import os
os.mkdir('/data/')

def getCancerObj(cancertype):
# cptac.download(dataset=cancertype,source='harmonized',)
Expand Down Expand Up @@ -47,4 +48,8 @@ def getCancerObj(cancertype):
dat.get_clinical(cs)
tsource = dat_list['transcriptomics']
res = dat.get_transcriptomics(tsource[0])
if res.columns.nlevels == 2:
res.columns = res.columns.droplevel(1)

print(ds+':',res.shape)
res.to_csv('/data/'+ds+'.csv')
39 changes: 3 additions & 36 deletions mRNAData/mRNADataSetsCLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
Basic CLI to import CPTAC proteomic data
'''
import argparse
import cptac
#import cptac
import pandas


def main():
Expand All @@ -15,42 +16,8 @@ def main():
to be collected')
opts = parser.parse_args()

if opts.type.lower() == 'brca':
dat = cptac.Brca()
elif opts.type.lower() == 'ccrcc':
dat = cptac.Ccrcc()
elif opts.type.lower() == 'coad':
dat = cptac.Coad()
elif opts.type.lower() == 'ucec':
dat = cptac.Ucec()
elif opts.type.lower() == 'gbm':
dat = cptac.Gbm()
elif opts.type.lower() == 'hnscc':
dat = cptac.Hnscc()
elif opts.type.lower() == 'lscc':
dat = cptac.Lscc()
elif opts.type.lower() == 'luad':
dat = cptac.Luad()
elif opts.type.lower() == 'ovarian':
dat = cptac.Ov()
elif opts.type.lower() == 'pdac':
dat = cptac.Pdac()
else:
exit()
#this call changed in recent version
dat_list = dat.list_data_sources().set_index('Data type').to_dict()['Available sources']
clinsource = dat_list['clinical']
if 'harmonized' in clinsource:
cs = 'harmonized'
else:
cs = clinsource[0]
dat.get_clinical(cs)
tsource = dat_list['transcriptomics']
df = dat.get_transcriptomics(tsource[0])
df=pandas.read_csv("/data/"+opts.type+'.csv',index_col=0)

if df.columns.nlevels == 2:
df.columns = df.columns.droplevel(1)

# Get the sample type specific dataframe
# if opts.sample.lower() != 'all':
# meta = dat.get_clinical()
Expand Down
11 changes: 1 addition & 10 deletions metrics/mrna-prot/mrna-prot-comparison.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,12 @@ outputs:
prot-files:
type: File[]
outputSource: run-all-algs-by-sig/prot-file
# dist-files:
# type: File[]
# outputSource: run-all-algs-by-sig/mat-dist-file
# dist-fig:
# type: File[]
# outputSource: get-distances/fig
# dist-tab:
# type: File
# outputSource: get-distances/table


steps:
run-all-algs-by-sig:
run: call-deconv-and-cor.cwl
scatter: [signature,mrna-alg,prot-alg,cancerType,tissueType]
scatter: [signature,mrna-alg,prot-alg,tissueType,cancerType]
scatterMethod: flat_crossproduct
in:
signature: signatures
Expand Down
11 changes: 8 additions & 3 deletions protData/getAllDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
'''

import cptac

import os
os.mkdir('/data/')

def getCancerObj(cancertype):
# cptac.download(dataset=cancertype,source='harmonized',)
Expand Down Expand Up @@ -46,5 +47,9 @@ def getCancerObj(cancertype):
cs = clinsource[0]
dat.get_clinical(cs)
tsource = dat_list['proteomics']
df = dat.get_proteomics(tsource[0])
print(ds+':',df.shape)
res = dat.get_proteomics(tsource[0])
if res.columns.nlevels == 2:
res.columns = res.columns.droplevel(1)

print(ds+':',res.shape)
res.to_csv('/data/'+ds+'.csv')
40 changes: 3 additions & 37 deletions protData/protDataSetsCLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
Basic CLI to import CPTAC proteomic data
'''
import argparse
import cptac

import pandas

def main():
parser = argparse.ArgumentParser()
Expand All @@ -14,42 +13,9 @@ def main():
help='Sample type, tumor vs normal vs all (default), \
to be collected')
opts = parser.parse_args()
df=pandas.read_csv("/data/"+opts.type+'.csv',index_col=0)
# df = df.reset_index()

if opts.type.lower() == 'brca':
dat = cptac.Brca()
elif opts.type.lower() == 'ccrcc':
dat = cptac.Ccrcc()
elif opts.type.lower() == 'coad':
dat = cptac.Coad()
elif opts.type.lower() == 'ucec':
dat = cptac.Ucec()
elif opts.type.lower() == 'gbm':
dat = cptac.Gbm()
elif opts.type.lower() == 'hnscc':
dat = cptac.Hnscc()
elif opts.type.lower() == 'lscc':
dat = cptac.Lscc()
elif opts.type.lower() == 'luad':
dat = cptac.Luad()
elif opts.type.lower() == 'ovarian':
dat = cptac.Ov()
elif opts.type.lower() == 'pdac':
dat = cptac.Pdac()
else:
exit()
#this call changed in recent version
dat_list = dat.list_data_sources().set_index('Data type').to_dict()['Available sources']
clinsource = dat_list['clinical']
if 'harmonized' in clinsource:
cs = 'harmonized'
else:
cs = clinsource[0]
dat.get_clinical(cs)
tsource = dat_list['proteomics']
df = dat.get_proteomics(tsource[0])

if df.columns.nlevels == 2:
df.columns = df.columns.droplevel(1)

# Get the sample type specific dataframe
# if opts.sample.lower() != 'all':
Expand Down

0 comments on commit 560577e

Please sign in to comment.