Skip to content

Commit

Permalink
downloader: add cancer hotspot
Browse files Browse the repository at this point in the history
  • Loading branch information
imedina committed Jan 2, 2024
1 parent 89831a6 commit ce1767a
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class DownloadProperties {
private EnsemblProperties ensembl;
private EnsemblProperties ensemblGenomes;
private URLProperties hgnc;
private URLProperties cancerHotspot;
private URLProperties refSeq;
private URLProperties refSeqFasta;
private URLProperties refSeqProteinFasta;
Expand Down Expand Up @@ -517,6 +518,15 @@ public DownloadProperties setHgnc(URLProperties hgnc) {
return this;
}

public URLProperties getCancerHotspot() {
return cancerHotspot;
}

public DownloadProperties setCancerHotspot(URLProperties cancerHotspot) {
this.cancerHotspot = cancerHotspot;
return this;
}

public static class EnsemblProperties {

private DatabaseCredentials database;
Expand Down
4 changes: 4 additions & 0 deletions cellbase-core/src/main/resources/configuration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ download:
hgnc:
host: https://ftp.ebi.ac.uk/pub/databases/genenames/hgnc/archive/monthly/tsv/hgnc_complete_set_2023-11-01.txt
version: 2023-11-01
cancerHotspot:
host: https://www.cancerhotspots.org/files/hotspots_v2.xls
version: "v2"
refSeq:
host: https://ftp.ncbi.nih.gov/refseq/H_sapiens/annotation/GRCh38_latest/refseq_identifiers/GRCh38_latest_genomic.gtf.gz
refSeqFasta:
Expand Down Expand Up @@ -161,6 +164,7 @@ download:
host: https://old.dgidb.org/data/monthly_tsvs/2022-Feb/interactions.tsv
version: "2022-02-01"
cadd:
## Nacho: Move to https://krishna.gs.washington.edu/download/CADD/v1.7-pre/GRCh38/whole_genome_SNVs.tsv.gz ASAP!
host: https://krishna.gs.washington.edu/download/CADD/v1.6/GRCh38/whole_genome_SNVs.tsv.gz
reactome:
host: http://www.reactome.org/download/current/biopax.zip
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public List<DownloadFile> download() throws IOException, InterruptedException {
downloadFiles.add(downloadMane(geneFolder));
downloadFiles.add(downloadLrg(geneFolder));
downloadFiles.add(downloadHgnc(geneFolder));
downloadFiles.add(downloadCancerHotspot(geneFolder));
downloadFiles.add(downloadDrugData(geneFolder));
downloadFiles.addAll(downloadGeneUniprotXref(geneFolder));
downloadFiles.add(downloadGeneExpressionAtlas(geneFolder));
Expand Down Expand Up @@ -211,7 +212,7 @@ private DownloadFile downloadLrg(Path geneFolder) throws IOException, Interrupte

private DownloadFile downloadHgnc(Path geneFolder) throws IOException, InterruptedException {
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
logger.info("Downloading LRG ...");
logger.info("Downloading HGNC ...");
String url = configuration.getDownload().getHgnc().getHost();
saveVersionData(EtlCommons.GENE_DATA, "HGNC_GENE", configuration.getDownload().getHgnc().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("hgncVersion.json"));
Expand All @@ -221,6 +222,18 @@ private DownloadFile downloadHgnc(Path geneFolder) throws IOException, Interrupt
return null;
}

private DownloadFile downloadCancerHotspot(Path geneFolder) throws IOException, InterruptedException {
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
logger.info("Downloading Cancer Hotspot ...");
String url = configuration.getDownload().getCancerHotspot().getHost();
saveVersionData(EtlCommons.GENE_DATA, "CANCER_HOTSPOT", configuration.getDownload().getHgnc().getVersion(),
getTimeStamp(), Collections.singletonList(url), geneFolder.resolve("cancerHotspotVersion.json"));
String[] array = url.split("/");
return downloadFile(url, geneFolder.resolve(array[array.length - 1]).toString());
}
return null;
}

private DownloadFile downloadGO(Path geneFolder) throws IOException, InterruptedException {
if (speciesConfiguration.getScientificName().equals("Homo sapiens")) {
logger.info("Downloading go annotation...");
Expand Down

0 comments on commit ce1767a

Please sign in to comment.