Skip to content

Commit

Permalink
init single-cell core
Browse files Browse the repository at this point in the history
  • Loading branch information
MatthijsPon committed Nov 14, 2024
1 parent 5e0f5bc commit 99608a2
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 3 deletions.
9 changes: 9 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
root = true

[*]
end_of_line = lf
insert_final_newline = true

[*.{java,py}]
indent_style = space
indent_size = 4
14 changes: 14 additions & 0 deletions scripts/importer/cbioportal_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class MetaFileTypes(object):
CNA_CONTINUOUS = 'meta_contCNA'
SEG = 'meta_segment'
EXPRESSION = 'meta_expression'
SINGLE_CELL_EXPRESSION = 'meta_single_cell_expression'
MUTATION = 'meta_mutations_extended'
MUTATION_UNCALLED = 'meta_mutations_uncalled'
METHYLATION = 'meta_methylation'
Expand Down Expand Up @@ -214,6 +215,18 @@ class MetaFileTypes(object):
'data_filename': True,
'gene_panel': False
},
MetaFileTypes.SINGLE_CELL_EXPRESSION: {
'cancer_study_identifier': True,
'genetic_alteration_type': True,
'datatype': True,
'stable_id': True,
'source_stable_id': False,
'show_profile_in_analysis_tab': True,
'profile_name': True,
'profile_description': True,
'data_filename': True,
'gene_panel': False
},
MetaFileTypes.METHYLATION: {
'cancer_study_identifier': True,
'genetic_alteration_type': True,
Expand Down Expand Up @@ -399,6 +412,7 @@ class MetaFileTypes(object):
MetaFileTypes.CNA_CONTINUOUS: "org.mskcc.cbio.portal.scripts.ImportProfileData",
MetaFileTypes.SEG: "org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData",
MetaFileTypes.EXPRESSION: "org.mskcc.cbio.portal.scripts.ImportProfileData",
MetaFileTypes.SINGLE_CELL_EXPRESSION: "org.mskcc.cbio.portal.scripts.ImportProfileData",
MetaFileTypes.MUTATION: "org.mskcc.cbio.portal.scripts.ImportProfileData",
MetaFileTypes.MUTATION_UNCALLED: "org.mskcc.cbio.portal.scripts.ImportProfileData",
MetaFileTypes.METHYLATION: "org.mskcc.cbio.portal.scripts.ImportProfileData",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Copyright (c) 2024 The Hyve B.V.
* This code is licensed under the GNU Affero General Public License (AGPL),
* version 3, or (at your option) any later version.
*/

/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

/*
* @author Matthijs Pon
*/

package org.mskcc.cbio.portal.dao;

import java.sql.*;
import org.mskcc.cbio.portal.model.*;

public class DaoSingleCellExpression {

private DaoSingleCellExpression() {
}

/**
* Set genetic profile link in `genetic_profile_link` table in database.
* @throws DaoException
**/
public static void addSingleCellExpression(int geneticProfileId, int entrezGeneId, String tissue, String cellType, String[] sampleIds, String[] values) throws DaoException {
Connection connection = null;
PreparedStatement preparedStatement = null;
ResultSet resultSet = null;

try {
// Open connection to database
connection = JdbcUtil.getDbConnection(DaoGeneticProfileLink.class);

// Prepare SQL statement
preparedStatement = connection.prepareStatement("INSERT INTO single_cell_expression "
+ "(GENETIC_PROFILE_ID, SAMPLE_ID, TISSUE, CELL_TYPE, ENTREZ_GENE_ID, EXPRESSION_VALUE) VALUES (?,?,?,?,?,?)");
for (int i = 0; i < sampleIds.length; i++) {
// for (String expressionValue: values) {
// Fill in statement
preparedStatement.setInt(1, geneticProfileId);
preparedStatement.setInt(2, sampleIds[i]);
preparedStatement.setString(3, tissue);
preparedStatement.setString(4, cellType);
preparedStatement.setInt(5, entrezGeneId);
preparedStatement.setFloat(6, Float.valueOf(values[i]));
preparedStatement.addBatch();
}

// Execute statement
preparedStatement.executeBatch();
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoGeneticProfileLink.class, connection, preparedStatement, resultSet);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ public enum GeneticAlterationType {
PROTEIN_ARRAY_PROTEIN_LEVEL,
PROTEIN_ARRAY_PHOSPHORYLATION,
GENESET_SCORE,
GENERIC_ASSAY
GENERIC_ASSAY,
SINGLE_CELL_EXPRESSION
};
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ public void run() {
);
importer.importData();
} else {
// All other files go through this
ImportTabDelimData importer = new ImportTabDelimData(
dataFile,
geneticProfile.getTargetLine(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ public ImportTabDelimData(
* @param geneticProfileId GeneticProfile ID.
* @param isIncrementalUpdateMode if true, update/append data to the existing one
*
* @deprecated : TODO shall we deprecate this feature (i.e. the targetLine)?
* //@deprecated : TODO shall we deprecate this feature (i.e. the targetLine)?
*/
public ImportTabDelimData(
File dataFile,
Expand Down Expand Up @@ -197,6 +197,7 @@ public void importData() {
return null;
});
}
// TODO: update this function
private void doImportData() throws IOException, DaoException {
try {
this.numLines = FileUtil.getNumLines(dataFile);
Expand All @@ -222,6 +223,8 @@ private void doImportData() throws IOException, DaoException {
boolean isGenericAssayProfile = geneticProfile != null
&& geneticProfile.getGeneticAlterationType() == GeneticAlterationType.GENERIC_ASSAY
&& headerParts[0].equalsIgnoreCase("ENTITY_STABLE_ID");
boolean isSingleCellExpressionProfile = geneticProfile != null
&& geneticProfile.getGeneticAlterationType() == GeneticAlterationType.SINGLE_CELL_EXPRESSION;

long typesDetected = List.of(isDiscretizedCnaProfile, isRppaProfile, isGsvaProfile, isGenericAssayProfile).stream().filter(Boolean::booleanValue).count();
if (typesDetected > 1) {
Expand All @@ -231,11 +234,15 @@ private void doImportData() throws IOException, DaoException {
int numRecordsToAdd = 0;
int samplesSkipped = 0;
try {
// TODO: also add profile somewhere?
int hugoSymbolIndex = getHugoSymbolIndex(headerParts);
int entrezGeneIdIndex = getEntrezGeneIdIndex(headerParts);
int rppaGeneRefIndex = getRppaGeneRefIndex(headerParts);
int genesetIdIndex = getGenesetIdIndex(headerParts);
int sampleStartIndex = getStartIndex(headerParts, hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, genesetIdIndex);
List<Integer> singleCellExpressionIndices = getSingleCellExpressionIndices(headerParts);
// TODO: add indices to here
int sampleStartIndex = getStartIndex(headerParts, hugoSymbolIndex, entrezGeneIdIndex, rppaGeneRefIndex, genesetIdIndex,
singleCellExpressionIndices.get(0), singleCellExpressionIndices.get(1));
int genericAssayIdIndex = getGenericAssayIdIndex(headerParts);
if (isRppaProfile) {
if (rppaGeneRefIndex == -1) {
Expand All @@ -250,7 +257,12 @@ private void doImportData() throws IOException, DaoException {
throw new RuntimeException("Error: the following column should be present for this type of data: ENTITY_STABLE_ID");
}
} else if (hugoSymbolIndex == -1 && entrezGeneIdIndex == -1) {
if (isSingleCellExpressionProfile) {
if (singleCellExpressionIndices.stream().anyMatch(n -> (n == -1))) {
throw new RuntimeException("Error the following columns should be present for single cell data: "); // TODO: ad columns
}
throw new RuntimeException("Error: at least one of the following columns should be present: Hugo_Symbol or Entrez_Gene_Id");
}
}

String sampleIds[];
Expand Down Expand Up @@ -366,6 +378,11 @@ private void doImportData() throws IOException, DaoException {
}
if (entrez != null && !DataValidator.isValidNumericSequence(entrez)) {
ProgressMonitor.logWarning("Ignoring line with invalid Entrez_Id " + entrez);
} else if (isSingleCellExpressionProfile) {
String sCExpTissue = rowParts[singleCellExpressionIndices.get(0)];
String sCExpCellType = rowParts[singleCellExpressionIndices.get(1)];
recordAdded = saveSingleCellExpressionLine(sampleValues, entrez, geneSymbol,
sCExpTissue, sCExpCellType);
} else {
String firstCellValue = rowParts[0];
if (targetLine == null || firstCellValue.equals(targetLine)) {
Expand Down Expand Up @@ -894,6 +911,13 @@ private int getRppaGeneRefIndex(String[] headers) {
return getColIndexByName(headers, "Composite.Element.Ref");
}

private List<Integer> getSingleCellExpressionIndices(String[] headers) {
return List.of(
getColIndexByName(headers, "Tissue"),
getColIndexByName(headers, "Cell_Type")
);
}

// helper function for finding the index of a column by name
private int getColIndexByName(String[] headers, String colName) {
for (int i = 0; i < headers.length; i++) {
Expand Down

0 comments on commit 99608a2

Please sign in to comment.