Skip to content

Commit

Permalink
Merge pull request #2414 from opencb/TASK-5827
Browse files Browse the repository at this point in the history
TASK-5827 - Port Patch 1.10.3 -> 2.0.0
  • Loading branch information
j-coll authored Mar 19, 2024
2 parents 7f52ceb + a39dba2 commit 76b6794
Show file tree
Hide file tree
Showing 120 changed files with 966 additions and 631 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
uses: opencb/java-common-libs/.github/workflows/deploy-docker-hub-workflow.yml@develop
needs: build
with:
cli: python3 ./build/cloud/docker/docker-build.py push --images base,init
cli: python3 ./build/cloud/docker/docker-build.py push --images base,init --tag ${{ needs.build.outputs.version }}
secrets: inherit

deploy-python:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
import org.opencb.opencga.core.models.job.Job;
import org.opencb.opencga.core.response.OpenCGAResult;

import java.io.*;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.util.*;

Expand Down Expand Up @@ -46,6 +48,26 @@ public static File getBamFileBySampleId(String sampleId, String studyId, FileMan
return (fileQueryResult.getNumResults() == 0) ? null : fileQueryResult.first();
}

public static File getBwFileBySampleId(String sampleId, String studyId, FileManager fileManager, String token) throws ToolException {
// Look for the bam file for each sample
OpenCGAResult<File> fileQueryResult;

Query query = new Query(FileDBAdaptor.QueryParams.FORMAT.key(), File.Format.BIGWIG)
.append(FileDBAdaptor.QueryParams.SAMPLE_IDS.key(), sampleId);
try {
fileQueryResult = fileManager.search(studyId, query, QueryOptions.empty(), token);
} catch (CatalogException e) {
throw new ToolException(e);
}

// Sanity check
if (fileQueryResult.getNumResults() > 1) {
throw new ToolException("Found more than one BIGWIG files (" + fileQueryResult.getNumResults() + ") for sample " + sampleId);
}

return (fileQueryResult.getNumResults() == 0) ? null : fileQueryResult.first();
}

public static File getBamFile(String filename, String sampleId, String studyId, FileManager fileManager, String token) throws ToolException {
// Look for the bam file for each sample
OpenCGAResult<File> fileQueryResult;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.opencga.analysis.alignment;

import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.catalog.managers.CatalogManager;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.file.*;
import org.opencb.opencga.core.response.OpenCGAResult;

import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;

public class AlignmentAnalysisUtils {

public static File linkAndUpdate(File bamCatalogFile, Path outPath, String jobId, String study, CatalogManager catalogManager, String token)
throws CatalogException, ToolException {
// Link BW file and update sample info
FileLinkParams fileLinkParams = new FileLinkParams()
.setUri(outPath.toString())
.setPath(Paths.get(jobId).resolve(outPath.getFileName()).toString());
OpenCGAResult<File> fileResult = catalogManager.getFileManager().link(study, fileLinkParams, true, token);
if (fileResult.getNumResults() != 1) {
throw new ToolException("It could not link OpenCGA file catalog file for '" + outPath + "'");
}
File outCatalogFile = fileResult.first();

// Updating file: samples, related file
FileUpdateParams updateParams = new FileUpdateParams()
.setSampleIds(bamCatalogFile.getSampleIds())
.setRelatedFiles(Collections.singletonList(new SmallRelatedFileParams()
.setFile(bamCatalogFile.getId())
.setRelation(FileRelatedFile.Relation.ALIGNMENT)));
try {
OpenCGAResult<File> updateResult = catalogManager.getFileManager().update(study, outCatalogFile.getId(), updateParams, null,
token);
if (updateResult.getNumUpdated() != 1) {
catalogManager.getFileManager().unlink(study, outCatalogFile.getId(), token);
throw new ToolException("It could not update OpenCGA file catalog (" + outCatalogFile.getId()
+ ") from alignment file ID '" + bamCatalogFile.getId() + "'");
}
} catch (CatalogException e) {
catalogManager.getFileManager().unlink(study, outCatalogFile.getId(), token);
throw e;
}
return outCatalogFile;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.opencga.analysis.alignment;

public class AlignmentConstants {

public static final String BAM_EXTENSION = ".bam";
public static final String BAI_EXTENSION = ".bai";
public static final String CRAM_EXTENSION = ".cram";
public static final String CRAI_EXTENSION = ".crai";
public static final String BIGWIG_EXTENSION = ".bw";
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,18 @@

package org.opencb.opencga.analysis.alignment;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.commons.datastore.core.ObjectMap;
import org.opencb.commons.datastore.core.QueryOptions;
import org.opencb.opencga.analysis.tools.OpenCgaToolScopeStudy;
import org.opencb.opencga.analysis.wrappers.deeptools.DeeptoolsWrapperAnalysisExecutor;
import org.opencb.opencga.catalog.exceptions.CatalogException;
import org.opencb.opencga.core.exceptions.ToolException;
import org.opencb.opencga.core.models.alignment.CoverageIndexParams;
import org.opencb.opencga.core.models.common.Enums;
import org.opencb.opencga.core.models.file.File;
import org.opencb.opencga.core.response.OpenCGAResult;
import org.opencb.opencga.core.tools.annotations.Tool;
import org.opencb.opencga.core.tools.annotations.ToolParams;

import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
Expand All @@ -42,18 +39,17 @@
@Tool(id = AlignmentCoverageAnalysis.ID, resource = Enums.Resource.ALIGNMENT, description = "Alignment coverage analysis.")
public class AlignmentCoverageAnalysis extends OpenCgaToolScopeStudy {

public final static String ID = "coverage-index-run";
public final static String DESCRIPTION = "Compute the coverage from a given alignment file, e.g., create a .bw file from a .bam file";
public static final String ID = "coverage-index-run";
public static final String DESCRIPTION = "Compute the coverage from a given alignment file, e.g., create a "
+ AlignmentConstants.BIGWIG_EXTENSION + " file from a " + AlignmentConstants.BAM_EXTENSION + " file";

@ToolParams
protected final CoverageIndexParams coverageParams = new CoverageIndexParams();

private File bamCatalogFile;
private Path inputPath;

private Path bwCatalogPath;
private Path outputPath;
private File baiCatalogFile;

@Override
protected void check() throws Exception {
super.check();

Expand All @@ -62,55 +58,89 @@ protected void check() throws Exception {
throw new ToolException("Missing study when computing alignment coverage");
}

OpenCGAResult<File> fileResult;
// Checking BAM file ID
try {
logger.info("{}: checking file {}", ID, coverageParams.getFile());
fileResult = catalogManager.getFileManager().get(getStudy(), coverageParams.getFile(), QueryOptions.empty(), getToken());
} catch (CatalogException e) {
throw new ToolException("Error accessing file '" + coverageParams.getFile() + "' of the study " + getStudy() + "'", e);
bamCatalogFile = catalogManager.getFileManager().get(getStudy(), coverageParams.getBamFileId(), QueryOptions.empty(),
getToken()).first();
if (bamCatalogFile == null) {
throw new ToolException("Could not find BAM file from ID '" + coverageParams.getBamFileId() + "'");
}
} catch (Exception e) {
throw new ToolException("Could not get BAM file from ID " + coverageParams.getBamFileId());
}
if (fileResult.getNumResults() <= 0) {
throw new ToolException("File '" + coverageParams.getFile() + "' not found in study '" + getStudy() + "'");

// Check if the input file is .bam
if (!bamCatalogFile.getName().endsWith(AlignmentConstants.BAM_EXTENSION)) {
throw new ToolException("Invalid input alignment file '" + coverageParams.getBamFileId() + "' (" + bamCatalogFile.getName()
+ "): it must be in BAM format");
}

bamCatalogFile = fileResult.getResults().get(0);
inputPath = Paths.get(bamCatalogFile.getUri());
String filename = inputPath.getFileName().toString();
// Getting BAI file
String baiFileId = coverageParams.getBaiFileId();
if (StringUtils.isEmpty(baiFileId)) {
// BAI file ID was not provided, looking for it
logger.info("BAI file ID was not provided, getting it from the internal alignment index of the BAM file ID {}",
bamCatalogFile.getId());
try {
baiFileId = bamCatalogFile.getInternal().getAlignment().getIndex().getFileId();
} catch (Exception e) {
throw new ToolException("Could not get internal alignment index file Id from BAM file ID '" + bamCatalogFile.getId());
}
}
try {
baiCatalogFile = catalogManager.getFileManager().get(getStudy(), baiFileId, QueryOptions.empty(), getToken()).first();
if (baiCatalogFile == null) {
throw new ToolException("Could not find BAI file from ID '" + coverageParams.getBaiFileId() + "'");
}
} catch (Exception e) {
throw new ToolException("Could not get BAI file from file ID " + baiFileId);
}

// Check if the input file is .bam
if (!filename.endsWith(".bam")) {
throw new ToolException("Invalid input alignment file '" + coverageParams.getFile() + "': it must be in BAM format");
logger.info("BAI file ID = {}; path = {}", baiCatalogFile.getId(), Paths.get(baiCatalogFile.getUri()));

// Checking filenames
if (!baiCatalogFile.getName().equals(bamCatalogFile.getName() + AlignmentConstants.BAI_EXTENSION)) {
throw new ToolException("Filenames mismatch, BAI file name must consist of BAM file name plus the extension "
+ AlignmentConstants.BAI_EXTENSION + "; BAM filename = " + bamCatalogFile.getName() + ", BAI filename = "
+ baiCatalogFile.getName());
}

// Sanity check: window size
logger.info("{}: checking window size {}", ID, coverageParams.getWindowSize());
logger.info("Checking window size {}", coverageParams.getWindowSize());
if (coverageParams.getWindowSize() <= 0) {
coverageParams.setWindowSize(Integer.parseInt(COVERAGE_WINDOW_SIZE_DEFAULT));
logger.info("{}: window size is set to {}", ID, coverageParams.getWindowSize());
}

// Path where the BW file will be created
outputPath = getOutDir().resolve(filename + ".bw");

// Check if BW exists already, and then check the flag 'overwrite'
bwCatalogPath = Paths.get(inputPath.toFile().getParent()).resolve(outputPath.getFileName());
if (bwCatalogPath.toFile().exists() && !coverageParams.isOverwrite()) {
// Nothing to do
throw new ToolException("Nothing to do: coverage file (" + bwCatalogPath + ") already exists and you set the flag 'overwrite'"
+ " to false");
logger.info("Window size is set to {}", coverageParams.getWindowSize());
}
}

@Override
protected void run() throws Exception {
setUpStorageEngineExecutor(study);

logger.info("{}: running with parameters {}", ID, coverageParams);
logger.info("Running with parameters {}", coverageParams);

step(() -> {

// Path where the BW file will be created
Path bwPath = getOutDir().resolve(bamCatalogFile.getName() + AlignmentConstants.BIGWIG_EXTENSION);

// In order to run "deeptools bamCoverage", both BAM and BAI files must be located in the same folder
// Check if both BAM and BAI files are located in the same folder otherwise these files will symbolic-link temporarily
// in the job dir to compute the BW file; then BAM and BAI symbolic links will be deleted from the job dir
Path bamPath = Paths.get(bamCatalogFile.getUri()).toAbsolutePath();
Path baiPath = Paths.get(baiCatalogFile.getUri()).toAbsolutePath();
if (!bamPath.getParent().toString().equals(baiPath.getParent().toString())) {
logger.info("BAM and BAI files must be symbolic-linked in the job dir since they are in different directories: {} and {}",
bamPath, baiPath);
bamPath = getOutDir().resolve(bamCatalogFile.getName()).toAbsolutePath();
baiPath = getOutDir().resolve(baiCatalogFile.getName()).toAbsolutePath();
Files.createSymbolicLink(bamPath, Paths.get(bamCatalogFile.getUri()).toAbsolutePath());
Files.createSymbolicLink(baiPath, Paths.get(baiCatalogFile.getUri()).toAbsolutePath());
}

Map<String, String> bamCoverageParams = new HashMap<>();
bamCoverageParams.put("b", inputPath.toAbsolutePath().toString());
bamCoverageParams.put("o", outputPath.toAbsolutePath().toString());
bamCoverageParams.put("b", bamPath.toString());
bamCoverageParams.put("o", bwPath.toAbsolutePath().toString());
bamCoverageParams.put("binSize", String.valueOf(coverageParams.getWindowSize()));
bamCoverageParams.put("outFileFormat", "bigwig");
bamCoverageParams.put("minMappingQuality", "20");
Expand All @@ -124,38 +154,39 @@ protected void run() throws Exception {
.setCommand("bamCoverage")
.execute();

// Check execution result
if (!outputPath.toFile().exists()) {
new ToolException("Something wrong happened running a coverage: BigWig file (" + outputPath.toFile().getName()
+ ") was not create, please, check log files.");
// Remove symbolic links if necessary
if (getOutDir().resolve(bamCatalogFile.getName()).toFile().exists()) {
Files.delete(getOutDir().resolve(bamCatalogFile.getName()));
}
if (getOutDir().resolve(baiCatalogFile.getName()).toFile().exists()) {
Files.delete(getOutDir().resolve(baiCatalogFile.getName()));
}

// Move the BW file to the same directory where the BAM file is located
logger.info("{}: moving coverage file {} to the same directory where the BAM file is located", ID,
bwCatalogPath.toFile().getName());
if (bwCatalogPath.toFile().exists()) {
bwCatalogPath.toFile().delete();
// Check execution result
if (!bwPath.toFile().exists()) {
throw new ToolException("Something wrong happened running a coverage: BigWig file (" + bwPath.toFile().getName()
+ ") was not create, please, check log files.");
}
FileUtils.moveFile(outputPath.toFile(), bwCatalogPath.toFile());

// And finally, link the BW file is necessary
boolean isLinked = true;
Path outputCatalogPath = Paths.get(bamCatalogFile.getPath()).getParent().resolve(outputPath.getFileName());
OpenCGAResult<File> fileResult;
// Try to copy the BW file into the BAM file directory
Path targetPath = Paths.get(bamCatalogFile.getUri()).getParent().resolve(bwPath.getFileName());
try {
fileResult = catalogManager.getFileManager().get(getStudy(), outputCatalogPath.toString(), QueryOptions.empty(),
getToken());
if (fileResult.getNumResults() <= 0) {
isLinked = false;
}
} catch (CatalogException e) {
isLinked = false;
Files.move(bwPath, targetPath);
} catch (Exception e) {
// Do nothing
logger.info("Moving from {} to {}: {}", bwPath, targetPath, e.getMessage());
}
if (!isLinked) {
logger.info("{}: linking file {} in catalog", ID, bwCatalogPath.toFile().getName());
catalogManager.getFileManager().link(getStudy(), bwCatalogPath.toUri(), outputCatalogPath.getParent().toString(),
new ObjectMap("parents", true), getToken());

if (targetPath.toFile().exists()) {
bwPath = targetPath;
logger.info("Coverage file was copied into the BAM folder: {}", bwPath);
} else {
logger.info("Couldn't copy the coverage file into the BAM folder. The coverage file is in the job folder instead: {}",
bwPath);
}

// Link generated BIGWIG file and update samples info
AlignmentAnalysisUtils.linkAndUpdate(bamCatalogFile, bwPath, getJobId(), study, catalogManager, token);
});
}
}
Loading

0 comments on commit 76b6794

Please sign in to comment.