diff --git a/codes3d/codes3d.py b/codes3d/codes3d.py index 7db58a5..50a2bad 100755 --- a/codes3d/codes3d.py +++ b/codes3d/codes3d.py @@ -438,7 +438,7 @@ def map_non_spatial_eqtls( 'pval', 'b', 'b_se', 'snp_chrom', 'snp_locus', 'maf', 'gene_chrom', 'gene_start', 'gene_end'] - eqtl_df = eqtl_df[cols].drop_duplicates() + eqtl_df = eqtl_df[cols].dropna().drop_duplicates() eqtl_project = tissues['project'].iloc[0] if not args.no_afc: afc_start_time = time.time() @@ -446,6 +446,9 @@ def map_non_spatial_eqtls( eqtl_df['sid_chr'] = eqtl_df['snp_chrom'] eqtl_df['sid_pos'] = eqtl_df['snp_locus'] eqtl_df['pid'] = eqtl_df['gencode_id'] + fp = os.path.join(args.output_dir, 'eqtls.txt') + eqtl_df.to_csv(fp, sep='\t', index=False) + print(eqtl_df) eqtl_df = calc_afc( eqtl_df, genotypes_fp, diff --git a/data_preparation/digest_genome.py b/data_preparation/digest_genome.py index e8158e8..dbeb324 100755 --- a/data_preparation/digest_genome.py +++ b/data_preparation/digest_genome.py @@ -106,9 +106,9 @@ def build_fragment_index(fragment_fp, output_db): if __name__ == '__main__': parser = argparse.ArgumentParser( description=( - 'Digest a genome with a restriction enzyme. ', - 'Creates a BED file where the fourth column denotes the restriction', - ' fragment number of the fragment specified')) + """Digest a genome with a restriction enzyme, and create + a BED file where the fourth column denotes the restriction + fragment number of the fragment specified""")) parser.add_argument( "-g", "--genome", help="Genome in genbank or fasta format.") @@ -117,21 +117,20 @@ def build_fragment_index(fragment_fp, output_db): help="Is the genome linear? (default: False)") parser.add_argument( "-e", "--enzyme", nargs='+', - help=("The restriction enzyme with which to fragment the genome, ", - "for example, `-e MspI`")) + help="""The restriction enzyme with which to digest the fragment + for example, `-e MspI`""") parser.add_argument( "-o", "--output_dir", - help=("Output directory for fragment BED and database files. ", - "(default: the directory of input genome.)")) + help="""Output directory for fragment BED and database + (default: the directory of input genome.""") parser.add_argument( "-b", "--do_not_index", action="store_true", - help=("Suppress building of fragment index from resultant ", - "fragmented genome.")) + help="""Suppress building of fragment index from the fragmented genome.""") parser.add_argument( "-d", "--output_db", - help=("Output file path for fragment BED file and database ", - "(default: the name of the input file, with the extension ", - "\".db\"/\".bed\", respectively.)")) + help="""Output file path for fragment BED file and database + (default: the name of the input file, with the extension, + "\".db\"/\".bed\", respectively.)""") parser.add_argument( "-z", "--list_enzymes", help="List the available enzymes for digestion.", action="store_true") diff --git a/data_preparation/init_eqtl_meta.py b/data_preparation/init_eqtl_meta.py index d6bd5c8..05c6724 100755 --- a/data_preparation/init_eqtl_meta.py +++ b/data_preparation/init_eqtl_meta.py @@ -9,8 +9,8 @@ Create PostgreSQL table of list of eQTL projects. Requires a meta_eqtls.txt """ -eqtl_fp = os.path.join(os.path.dirname(__file__), '../lib/meta_eqtls.txt') +eqtl_fp = os.path.join(os.path.dirname(__file__), '../lib/meta_info/meta_eqtls.txt') eqtls = pd.read_csv(eqtl_fp, sep='\t') db = create_engine( - 'postgres://codes3d:codes3d@127.0.0.1/codes3d_commons', echo=False) + 'postgresql://codes3d:codes3d@127.0.0.1/codes3d_commons', echo=False) eqtls.to_sql('meta_eqtls', con=db, if_exists='replace', index=False) diff --git a/data_preparation/init_gene_variant_db.py b/data_preparation/init_gene_variant_db.py index fb82b10..54e1121 100755 --- a/data_preparation/init_gene_variant_db.py +++ b/data_preparation/init_gene_variant_db.py @@ -82,20 +82,20 @@ def build_gene_table( parser = argparse.ArgumentParser( description='Build database tables gene and variant fragments') parser.add_argument( - '-v', '--variants', + "-v", "--variants", help='The filepath to the variant lookup file') parser.add_argument( - '-g', '--genes', - help=('The filepath to a sorted gene reference file containing ', - 'id, chrom, start, end, name, and gencode_id columns.', - 'Note that the id indicates the sorting (chrom, start) ', - 'order of the file starting at 1.')) + "-g", "--genes", + help="""The filepath to a sorted gene reference file containing + id, chrom, start, end, name, and gencode_id columns. + Note that the id indicates the sorting (chrom, start) + order of the file starting at 1.""") parser.add_argument( "-t", "--table", - help='Name of table e.g. variant_lookup_mboi.') + help="Name of table e.g. variant_lookup_mboi.") parser.add_argument( "-u", "--db-url", required=True, - help='URL of database e.g posgresql://user:password@hostname/database') + help="URL of database e.g posgresql://user:password@hostname/database") parser.add_argument( "-c", "--config", default=os.path.join(os.path.dirname(__file__), diff --git a/data_preparation/init_hic_meta.py b/data_preparation/init_hic_meta.py index 97eefdc..9209dac 100755 --- a/data_preparation/init_hic_meta.py +++ b/data_preparation/init_hic_meta.py @@ -29,5 +29,5 @@ ).drop_duplicates() db = create_engine( - 'postgres://codes3d:codes3d@127.0.0.1/codes3d_commons', echo=False) + 'postgresql://codes3d:codes3d@127.0.0.1/codes3d_commons', echo=False) libraries.drop_duplicates().to_sql('meta_hic', con=db, if_exists='replace') diff --git a/docs/.env.codes3d b/docs/.env.codes3d deleted file mode 100644 index 09df252..0000000 --- a/docs/.env.codes3d +++ /dev/null @@ -1,2 +0,0 @@ -# export PYTHONPATH="$PYTHONPATH:`dirname $0`/../codes3d" -PS1="$PS1"'CoDeS3D> ' \ No newline at end of file diff --git a/environment.yaml b/environment.yaml index 0ec60d0..71ab651 100755 --- a/environment.yaml +++ b/environment.yaml @@ -9,12 +9,12 @@ dependencies: - numpy=1.19.1 - pandas=1.0.5 - pandas-plink #=2.0.4 - - psycopg2 #=2.8.5 + - psycopg2 =2.8.5 - pybedtools #=0.8.1 - requests #=2.24.0 - scikits-bootstrap #=1.0.1 - - SQLAlchemy #=1.3.18 - - SQLAlchemy-Utils #=0.36.8 + - SQLAlchemy =1.3.18 + - SQLAlchemy-Utils =0.36.8 - statsmodels #=0.11.1 - pytorch #=1.9.0 - tqdm #=4.48.0