Skip to content

Commit

Permalink
lib: update GeneDownloadManager to call the script gene_extra_info.pl…
Browse files Browse the repository at this point in the history
…, #TASK-5575, #TASK-5564
  • Loading branch information
jtarraga committed Jul 24, 2024
1 parent 299003b commit 1d171d5
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 10 deletions.
17 changes: 15 additions & 2 deletions cellbase-app/app/scripts/ensembl-scripts/ensembl_canonical.pl
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,26 @@

$query->formatter("TSV");

open (ENSEMBL_CANONICAL, ">$outdir/ensembl_canonical.txt") || die "Cannot open ensembl_canonical.txt file";
# Open the file for writing
open(my $fh, '>', "$outdir/ensembl_canonical.txt") or die "Cannot open ensembl_canonical.txt file: $!";

# Save the original stdout
my $original_stdout = *STDOUT;
open(STDOUT, '>&', $fh) or die "Can't redirect STDOUT: $!";

my $query_runner = BioMart::QueryRunner->new();

# to obtain unique rows only
$query_runner->uniqueRowsOnly(1);
$query_runner->execute($query);
#$query_runner->printHeader();
print ENSEMBL_CANONICAL $query_runner->printResults();
#print ENSEMBL_CANONICAL $query_runner->printResults();
# Call printResults which prints to STDOUT (now redirected to the file)
$query_runner->printResults();
#$query_runner->printFooter();

# Restore the original stdout
open(STDOUT, '>&', $original_stdout) or die "Can't restore STDOUT: $!";

# Close the filehandle
close($fh) or die "Failed to close file: $!";
2 changes: 1 addition & 1 deletion cellbase-app/app/scripts/ensembl-scripts/genome_info.pl
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@

sub print_parameters {
print "Parameters: ";
print "species: $species, outfile: $outfile, ";
print "species: $species, assembly: $assembly, outfile: $outfile, ";
print "ensembl-registry: $ENSEMBL_REGISTRY, ";
print "ensembl-host: $ENSEMBL_HOST, ensembl-port: $ENSEMBL_PORT, ";
print "ensembl-user: $ENSEMBL_USER, verbose: $verbose, help: $help";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ public final class EtlCommons {
public static final String REFSEQ_RNA_FNA_FILE_ID = "RNA_FNA";

// Gene annotation
public static final String ENSEMBL_CANONICAL_DATA = "ensembl_canonical";
public static final String GENE_EXTRA_INFO_DATA = "gene_extra_info";
// - MANE Select
public static final String MANE_SELECT_DATA = "MANE Select";
// Must match the configuration file
Expand Down Expand Up @@ -354,6 +356,8 @@ public final class EtlCommons {
dataNamesMap.put(GENOME_DATA, "Genome");
dataNamesMap.put(GENOME_INFO_DATA, "Genome Info");
dataNamesMap.put(GENE_DATA, "Gene");
dataNamesMap.put(ENSEMBL_CANONICAL_DATA, "Ensembl canonical");
dataNamesMap.put(GENE_EXTRA_INFO_DATA, "Gene extra info");
dataNamesMap.put(GENE_ANNOTATION_DATA, "Gene Annotation");
dataNamesMap.put(MANE_SELECT_DATA, "MANE Select");
dataNamesMap.put(LRG_DATA, "LRG");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ public List<DownloadFile> download() throws IOException, InterruptedException, C
downloadFiles.addAll(downloadEnsemblData(ensemblDownloadPath));

// Ensembl canonical
downloadEnsemblCanonical();
downloadEnsemblCanonical(geneDownloadPath);

// Gene extra info
downloadGeneExtraInfo(geneDownloadPath);

// RefSeq
downloadFiles.addAll(downloadRefSeq(refSeqDownloadPath));
Expand Down Expand Up @@ -151,20 +154,43 @@ private List<DownloadFile> downloadRefSeq(Path refSeqDownloadPath) throws IOExce
return downloadFiles;
}

public void downloadEnsemblCanonical() throws IOException, CellBaseException {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENOME_INFO_DATA));
Path sequenceFolder = downloadFolder.resolve(GENOME_DATA);
Files.createDirectories(sequenceFolder);
public void downloadEnsemblCanonical(Path geneDownloadPath) throws IOException, CellBaseException {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(ENSEMBL_CANONICAL_DATA));

String dockerImage = "opencb/cellbase-builder:" + GitRepositoryState.get().getBuildVersion();
try {
// Build command line to run Perl script via docker image
// Output binding
AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(
sequenceFolder.toAbsolutePath().toString(), "/tmp");
geneDownloadPath.toAbsolutePath().toString(), "/tmp");

// Params
String params = "/opt/cellbase/scripts/ensembl-scripts/ensembl_canonical.pl"
+ " --species \"" + speciesConfiguration.getId() + "\""
+ " --assembly \"" + assemblyConfiguration.getName() + "\""
+ " --outdir \"" + outputBinding.getValue() + "\"";

// Execute perl script in docker
DockerUtils.run(dockerImage, null, outputBinding, params, null);
} catch (Exception e) {
throw new CellBaseException("Error executing Perl script from Docker " + dockerImage, e);
}

logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(ENSEMBL_CANONICAL_DATA));
}

public void downloadGeneExtraInfo(Path geneDownloadPath) throws IOException, CellBaseException {
logger.info(DOWNLOADING_LOG_MESSAGE, getDataName(GENE_EXTRA_INFO_DATA));

String dockerImage = "opencb/cellbase-builder:" + GitRepositoryState.get().getBuildVersion();
try {
// Build command line to run Perl script via docker image
// Output binding
AbstractMap.SimpleEntry<String, String> outputBinding = new AbstractMap.SimpleEntry<>(
geneDownloadPath.toAbsolutePath().toString(), "/tmp");

// Params
String params = "/opt/cellbase/scripts/ensembl-scripts/gene_extra_info.pl"
+ " --species \"" + speciesConfiguration.getId() + "\""
+ " --outdir \"" + outputBinding.getValue() + "\"";

Expand All @@ -174,7 +200,7 @@ public void downloadEnsemblCanonical() throws IOException, CellBaseException {
throw new CellBaseException("Error executing Perl script from Docker " + dockerImage, e);
}

logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENOME_INFO_DATA));
logger.info(DOWNLOADING_DONE_LOG_MESSAGE, getDataName(GENE_EXTRA_INFO_DATA));
}

private DownloadFile downloadMane(Path geneDownloadPath) throws IOException, InterruptedException, CellBaseException {
Expand Down

0 comments on commit 1d171d5

Please sign in to comment.