diff --git a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java index 012289383..77a8f160f 100644 --- a/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java +++ b/cellbase-lib/src/main/java/org/opencb/cellbase/lib/download/RepeatsDownloadManager.java @@ -46,47 +46,69 @@ public List downloadRepeats() throws IOException, InterruptedExcep // Check if species is supported if (SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), REPEATS_DATA)) { - logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REPEATS_DATA)); Path repeatsFolder = downloadFolder.resolve(REPEATS_DATA); Files.createDirectories(repeatsFolder); + Path trfFolder = Files.createDirectories(repeatsFolder.resolve(TRF_DATA)); + Path wmFolder = Files.createDirectories(repeatsFolder.resolve(WM_DATA)); + Path gsdFolder = Files.createDirectories(repeatsFolder.resolve(GSD_DATA)); String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName()); + // Already downloaded ? + boolean downloadTrf = !isAlreadyDownloaded(trfFolder.resolve(getDataVersionFilename(TRF_DATA)), getDataName(TRF_DATA)) + && configuration.getDownload().getSimpleRepeats().getFiles().containsKey(prefixId + SIMPLE_REPEATS_FILE_ID); + boolean downloadWm = !isAlreadyDownloaded(wmFolder.resolve(getDataVersionFilename(WM_DATA)), getDataName(WM_DATA)) + && configuration.getDownload().getWindowMasker().getFiles().containsKey(prefixId + WINDOW_MASKER_FILE_ID); + boolean downloadGsd = !isAlreadyDownloaded(gsdFolder.resolve(getDataVersionFilename(GSD_DATA)), getDataName(GSD_DATA)) + && configuration.getDownload().getGenomicSuperDups().getFiles().containsKey(prefixId + GENOMIC_SUPER_DUPS_FILE_ID); + + if (!downloadTrf && !downloadWm && !downloadGsd) { + return new ArrayList<>(); + } + + logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REPEATS_DATA)); + // Download tandem repeat finder - if (configuration.getDownload().getSimpleRepeats().getFiles().containsKey(prefixId + SIMPLE_REPEATS_FILE_ID)) { + if (downloadTrf) { + logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(TRF_DATA)); String url = configuration.getDownload().getSimpleRepeats().getHost() + configuration.getDownload().getSimpleRepeats().getFiles().get(prefixId + SIMPLE_REPEATS_FILE_ID); Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url)); logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath); downloadFiles.add(downloadFile(url, outputPath.toString())); logger.info(OK_LOG_MESSAGE); + saveDataSource(TRF_DATA, configuration.getDownload().getSimpleRepeats().getVersion(), getTimeStamp(), - Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(TRF_DATA))); + Collections.singletonList(url), trfFolder.resolve(getDataVersionFilename(TRF_DATA))); } // Download WindowMasker - if (configuration.getDownload().getWindowMasker().getFiles().containsKey(prefixId + WINDOW_MASKER_FILE_ID)) { + if (downloadWm) { + logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(WM_DATA)); String url = configuration.getDownload().getWindowMasker().getHost() + configuration.getDownload().getWindowMasker().getFiles().get(prefixId + WINDOW_MASKER_FILE_ID); Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url)); logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath); downloadFiles.add(downloadFile(url, outputPath.toString())); logger.info(OK_LOG_MESSAGE); + saveDataSource(WM_DATA, configuration.getDownload().getWindowMasker().getVersion(), getTimeStamp(), - Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(WM_DATA))); + Collections.singletonList(url), wmFolder.resolve(getDataVersionFilename(WM_DATA))); } // Download genomic super duplications - if (configuration.getDownload().getGenomicSuperDups().getFiles().containsKey(prefixId + GENOMIC_SUPER_DUPS_FILE_ID)) { + if (downloadGsd) { + logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GSD_DATA)); String url = configuration.getDownload().getGenomicSuperDups().getHost() + configuration.getDownload().getGenomicSuperDups().getFiles().get(prefixId + GENOMIC_SUPER_DUPS_FILE_ID); Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url)); logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath); downloadFiles.add(downloadFile(url, outputPath.toString())); logger.info(OK_LOG_MESSAGE); + saveDataSource(GSD_DATA, configuration.getDownload().getGenomicSuperDups().getVersion(), getTimeStamp(), - Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(GSD_DATA))); + Collections.singletonList(url), gsdFolder.resolve(getDataVersionFilename(GSD_DATA))); } logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REPEATS_DATA));