Skip to content

Commit

Permalink
lib: improve repeats downloaders by checking if data is already downl…
Browse files Browse the repository at this point in the history
…oaded, #TASK-5575, #TASK-5576
  • Loading branch information
jtarraga committed Jul 24, 2024
1 parent d10931d commit b422f3a
Showing 1 changed file with 29 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,47 +46,69 @@ public List<DownloadFile> downloadRepeats() throws IOException, InterruptedExcep

// Check if species is supported
if (SpeciesUtils.hasData(configuration, speciesConfiguration.getScientificName(), REPEATS_DATA)) {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REPEATS_DATA));

Path repeatsFolder = downloadFolder.resolve(REPEATS_DATA);
Files.createDirectories(repeatsFolder);
Path trfFolder = Files.createDirectories(repeatsFolder.resolve(TRF_DATA));
Path wmFolder = Files.createDirectories(repeatsFolder.resolve(WM_DATA));
Path gsdFolder = Files.createDirectories(repeatsFolder.resolve(GSD_DATA));

String prefixId = getConfigurationFileIdPrefix(speciesConfiguration.getScientificName());

// Already downloaded ?
boolean downloadTrf = !isAlreadyDownloaded(trfFolder.resolve(getDataVersionFilename(TRF_DATA)), getDataName(TRF_DATA))
&& configuration.getDownload().getSimpleRepeats().getFiles().containsKey(prefixId + SIMPLE_REPEATS_FILE_ID);
boolean downloadWm = !isAlreadyDownloaded(wmFolder.resolve(getDataVersionFilename(WM_DATA)), getDataName(WM_DATA))
&& configuration.getDownload().getWindowMasker().getFiles().containsKey(prefixId + WINDOW_MASKER_FILE_ID);
boolean downloadGsd = !isAlreadyDownloaded(gsdFolder.resolve(getDataVersionFilename(GSD_DATA)), getDataName(GSD_DATA))
&& configuration.getDownload().getGenomicSuperDups().getFiles().containsKey(prefixId + GENOMIC_SUPER_DUPS_FILE_ID);

if (!downloadTrf && !downloadWm && !downloadGsd) {
return new ArrayList<>();
}

logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(REPEATS_DATA));

// Download tandem repeat finder
if (configuration.getDownload().getSimpleRepeats().getFiles().containsKey(prefixId + SIMPLE_REPEATS_FILE_ID)) {
if (downloadTrf) {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(TRF_DATA));
String url = configuration.getDownload().getSimpleRepeats().getHost()
+ configuration.getDownload().getSimpleRepeats().getFiles().get(prefixId + SIMPLE_REPEATS_FILE_ID);
Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));
logger.info(OK_LOG_MESSAGE);

saveDataSource(TRF_DATA, configuration.getDownload().getSimpleRepeats().getVersion(), getTimeStamp(),
Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(TRF_DATA)));
Collections.singletonList(url), trfFolder.resolve(getDataVersionFilename(TRF_DATA)));
}

// Download WindowMasker
if (configuration.getDownload().getWindowMasker().getFiles().containsKey(prefixId + WINDOW_MASKER_FILE_ID)) {
if (downloadWm) {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(WM_DATA));
String url = configuration.getDownload().getWindowMasker().getHost()
+ configuration.getDownload().getWindowMasker().getFiles().get(prefixId + WINDOW_MASKER_FILE_ID);
Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));
logger.info(OK_LOG_MESSAGE);

saveDataSource(WM_DATA, configuration.getDownload().getWindowMasker().getVersion(), getTimeStamp(),
Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(WM_DATA)));
Collections.singletonList(url), wmFolder.resolve(getDataVersionFilename(WM_DATA)));
}

// Download genomic super duplications
if (configuration.getDownload().getGenomicSuperDups().getFiles().containsKey(prefixId + GENOMIC_SUPER_DUPS_FILE_ID)) {
if (downloadGsd) {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GSD_DATA));
String url = configuration.getDownload().getGenomicSuperDups().getHost()
+ configuration.getDownload().getGenomicSuperDups().getFiles().get(prefixId + GENOMIC_SUPER_DUPS_FILE_ID);
Path outputPath = repeatsFolder.resolve(getFilenameFromUrl(url));
logger.info(DOWNLOADING_FROM_TO_LOG_MESSAGE, url, outputPath);
downloadFiles.add(downloadFile(url, outputPath.toString()));
logger.info(OK_LOG_MESSAGE);

saveDataSource(GSD_DATA, configuration.getDownload().getGenomicSuperDups().getVersion(), getTimeStamp(),
Collections.singletonList(url), repeatsFolder.resolve(getDataVersionFilename(GSD_DATA)));
Collections.singletonList(url), gsdFolder.resolve(getDataVersionFilename(GSD_DATA)));
}

logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(REPEATS_DATA));
Expand Down

0 comments on commit b422f3a

Please sign in to comment.