Skip to content

Commit

Permalink
lib: add DataReleaseSingleton and JUnit tests, #TASK-6565
Browse files Browse the repository at this point in the history
On branch TASK-6565
Changes to be committed:
	new file:   cellbase-lib/src/main/java/org/opencb/cellbase/lib/impl/core/DataReleaseSingleton.java
	new file:   cellbase-lib/src/test/java/org/opencb/cellbase/lib/impl/core/DataReleaseSingletonTest.java
  • Loading branch information
jtarraga committed Jul 18, 2024
1 parent 5943afc commit f9a4f45
Show file tree
Hide file tree
Showing 2 changed files with 243 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* Copyright 2015-2020 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.opencb.cellbase.lib.impl.core;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.opencb.cellbase.core.config.CellBaseConfiguration;
import org.opencb.cellbase.core.config.SpeciesConfiguration;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.models.DataRelease;
import org.opencb.cellbase.lib.db.MongoDBManager;
import org.opencb.cellbase.lib.managers.CellBaseManagerFactory;
import org.opencb.commons.datastore.mongodb.MongoDBCollection;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public final class DataReleaseSingleton {

// {key = dbname, value = species}
private Map<String, String> speciesMap = new HashMap<>();
// {key = dbname, value = assembly}
private Map<String, String> assemblyMap = new HashMap<>();
// {key = dbname, value = lock}
private Map<String, ReentrantReadWriteLock> rwLockMap = new HashMap<>();

// {key = dbname, value = { key = release, value = { key = data, value = collection } } }
private Map<String, Map<Integer, Map<String, MongoDBCollection>>> cachedData = new HashMap<>();

private CellBaseManagerFactory managerFactory;

private static DataReleaseSingleton instance;

public static final String UNKOWN_DATABASE_MSG_PREFIX = "Unknown database ";
public static final String INVALID_RELEASE_MSG_PREFIX = "Invalid release ";
public static final String INVALID_DATA_MSG_PREFIX = "Invalid data ";

// Private constructor to prevent instantiation
private DataReleaseSingleton(CellBaseManagerFactory managerFactory) {
this.managerFactory = managerFactory;

// Support multi species and assemblies
CellBaseConfiguration configuration = managerFactory.getConfiguration();
for (SpeciesConfiguration vertebrate : configuration.getSpecies().getVertebrates()) {
for (SpeciesConfiguration.Assembly assembly : vertebrate.getAssemblies()) {
String databaseName = MongoDBManager.getDatabaseName(vertebrate.getId(), assembly.getName(), configuration.getVersion());
// This is necessary, before creating the database name the assembly is "cleaned", and we need to get the data release
// manager from the species and the assembly
speciesMap.put(databaseName, vertebrate.getId());
assemblyMap.put(databaseName, assembly.getName());
rwLockMap.put(databaseName, new ReentrantReadWriteLock());
cachedData.put(databaseName, new HashMap<>());
}
}
}

// Initialization method to set up the instance with parameters
public static synchronized void initialize(CellBaseManagerFactory managerFactory) {
if (instance == null) {
instance = new DataReleaseSingleton(managerFactory);
}
}

// Method to get the single instance of the class
public static DataReleaseSingleton getInstance() {
if (instance == null) {
throw new IllegalStateException("Singleton not initialized. Call the function 'initialize' first.");
}
return instance;
}

// Method to load data from MongoDB and cache it
private void loadData(String dbname) throws CellBaseException {
String species = speciesMap.get(dbname);
String assembly = assemblyMap.get(dbname);
ReleaseMongoDBAdaptor releaseMongoDBAdaptor = managerFactory.getDataReleaseManager(species, assembly).getReleaseDBAdaptor();
List<DataRelease> dataReleases = releaseMongoDBAdaptor.getAll().getResults();
if (CollectionUtils.isNotEmpty(dataReleases)) {
cachedData.put(dbname, new HashMap<>());
for (DataRelease dataRelease : dataReleases) {
Map<String, MongoDBCollection> collectionMap = new HashMap<>();
for (Map.Entry<String, String> entry : dataRelease.getCollections().entrySet()) {
collectionMap.put(entry.getKey(), releaseMongoDBAdaptor.mongoDataStore.getCollection(entry.getValue()));
}
cachedData.get(dbname).put(dataRelease.getRelease(), collectionMap);
}
}
}

public void checkDataRelease(String dbname, int release) throws CellBaseException {
checkDataRelease(dbname, release, null);
}

public void checkDataRelease(String dbname, int release, String data) throws CellBaseException {
// Lock and load data if necessary
if (!cachedData.containsKey(dbname)) {
// If the data release is invalid, throw an exception
String msg = UNKOWN_DATABASE_MSG_PREFIX + dbname;
throw new CellBaseException(msg);
}
rwLockMap.get(dbname).writeLock().lock();
try {
if (!cachedData.containsKey(dbname) || !cachedData.containsKey(release)
|| (StringUtils.isNotEmpty(data) && !cachedData.get(release).containsKey(data))) {
loadData(dbname);
}
} finally {
rwLockMap.get(dbname).writeLock().unlock();
}

// Check after loading
if (!cachedData.get(dbname).containsKey(release)) {
// If the data release is invalid, throw an exception
String msg = INVALID_RELEASE_MSG_PREFIX + release + ". The available data releases are: " + cachedData.get(dbname).keySet();
throw new CellBaseException(msg);
}
if (StringUtils.isNotEmpty(data) && !cachedData.get(dbname).get(release).containsKey(data)) {
// If the data release is invalid, throw an exception
String msg = INVALID_DATA_MSG_PREFIX + " '" + data + "', it's not present in release " + release + ". The available data are: "
+ cachedData.get(dbname).get(release).keySet();
throw new CellBaseException(msg);
}
}

// Method to get collection name based on the data and the release
public MongoDBCollection getMongoDBCollection(String dbname, String data, int release) throws CellBaseException {
checkDataRelease(dbname, release, data);
return cachedData.get(dbname).get(release).get(data);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package org.opencb.cellbase.lib.impl.core;

import com.fasterxml.jackson.core.JsonProcessingException;
import org.hamcrest.CoreMatchers;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.opencb.biodata.models.core.Gene;
import org.opencb.cellbase.core.api.GeneQuery;
import org.opencb.cellbase.core.api.query.QueryException;
import org.opencb.cellbase.core.exception.CellBaseException;
import org.opencb.cellbase.core.result.CellBaseDataResult;
import org.opencb.cellbase.lib.GenericMongoDBAdaptorTest;
import org.opencb.cellbase.lib.db.MongoDBManager;
import org.opencb.cellbase.lib.managers.GeneManager;
import org.opencb.commons.datastore.mongodb.MongoDBCollection;
import org.opencb.commons.datastore.mongodb.MongoDataStore;

import java.util.HashMap;
import java.util.Map;
import java.util.stream.Collectors;

import static org.hamcrest.MatcherAssert.assertThat;
import static org.junit.jupiter.api.Assertions.*;
import static org.opencb.cellbase.core.ParamConstants.DATA_RELEASE_PARAM;

class DataReleaseSingletonTest extends GenericMongoDBAdaptorTest {

private String dbname;

@BeforeEach
public void init() throws CellBaseException {
dbname = MongoDBManager.getDatabaseName(SPECIES, ASSEMBLY, cellBaseConfiguration.getVersion());
}

@Test
public void testChangeCollectionMap() throws CellBaseException, QueryException, IllegalAccessException, JsonProcessingException {
MongoDBManager mongoDBManager = new MongoDBManager(cellBaseConfiguration);
MongoDataStore mongoDatastore = mongoDBManager.createMongoDBDatastore(SPECIES, ASSEMBLY);

GeneManager geneManager = cellBaseManagerFactory.getGeneManager(SPECIES, ASSEMBLY);

// DataReleaseManager dataReleaseManager = new DataReleaseManager(SPECIES, ASSEMBLY, cellBaseConfiguration);
// DataRelease newDataRelease = dataReleaseManager.createRelease();
// assertEquals(5, newDataRelease.getRelease());

Map<String, String> paramMap = new HashMap<>();
paramMap.put("id", "ENSG00000248746");
paramMap.put("include", "id,name,start,end");
paramMap.put(DATA_RELEASE_PARAM, "1");

GeneQuery geneQuery = new GeneQuery(paramMap);
geneQuery.setCount(Boolean.TRUE);

CellBaseDataResult<Gene> cellBaseDataResult = geneManager.search(geneQuery);
// WARNING: these values below may slightly change from one data version to another
assertEquals(1, cellBaseDataResult.getNumMatches());
assertThat(cellBaseDataResult.getResults().stream().map(gene -> gene.getName()).collect(Collectors.toList()),
CoreMatchers.hasItems("ACTN3"));
}

@Test
public void testDataReleaseSingleton() throws CellBaseException {
MongoDBCollection collection = DataReleaseSingleton.getInstance().getMongoDBCollection(dbname, "gene", 1);
assertTrue(collection != null);
}

@Test
public void testCheckDatabaseFail() throws CellBaseException {
CellBaseException exception = assertThrows(CellBaseException.class, () -> DataReleaseSingleton.getInstance()
.checkDataRelease("toto", 10));
assertTrue(exception.getMessage().startsWith(DataReleaseSingleton.UNKOWN_DATABASE_MSG_PREFIX));
}

@Test
public void testCheckReleaseFail() throws CellBaseException {
CellBaseException exception = assertThrows(CellBaseException.class, () -> DataReleaseSingleton.getInstance()
.checkDataRelease(dbname, 10));
assertTrue(exception.getMessage().startsWith(DataReleaseSingleton.INVALID_RELEASE_MSG_PREFIX));
}

@Test
public void testCheckReleasePass() throws CellBaseException {
DataReleaseSingleton.getInstance().checkDataRelease(dbname, 1);
}

@Test
public void testCheckDataFail() throws CellBaseException {
CellBaseException exception = assertThrows(CellBaseException.class, () -> DataReleaseSingleton.getInstance()
.checkDataRelease(dbname, 1, "toto"));
assertTrue(exception.getMessage().startsWith(DataReleaseSingleton.INVALID_DATA_MSG_PREFIX));
}

@Test
public void testCheckDataPass() throws CellBaseException {
DataReleaseSingleton.getInstance().checkDataRelease(dbname, 1, "gene");
}
}

0 comments on commit f9a4f45

Please sign in to comment.