Skip to content

Commit

Permalink
tweak berkeley to try to speed up match data build
Browse files Browse the repository at this point in the history
  • Loading branch information
tgrego committed Jun 26, 2024
1 parent 82dcd94 commit e647d2d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 29 deletions.
4 changes: 2 additions & 2 deletions core/precalcmatches/berkeley/buildFromIprscan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ then
fi

#"$JAVA" -Xmx2048M -jar berkeley-db-builder.jar "$@"
"$JAVA" -XX:+UseParallelGC -XX:ParallelGCThreads=4 -XX:+UseCompressedOops -Xms4048M -Xmx22048M -jar berkeley-db-builder.jar "$@"
"$JAVA" -XX:+UseParallelGC -XX:ParallelGCThreads=8 -XX:+UseCompressedOops -Xms16000M -Xmx32000M -jar berkeley-db-builder.jar "$@"

#end
#end
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,22 @@ public void create(String dbStore, File berkeleyDBDirectory) {
// Split *.jdb log files into subdirectories in the env home dir
// test not to
//myEnvConfig.setConfigParam("je.log.nDataDirectories", Integer.toString(numSubDirs));

myEnvConfig.setConfigParam("je.log.fileMax", Integer.toString(210000000)); //204M
// worth increasing file size?
// myEnvConfig.setConfigParam("je.log.fileMax", Integer.toString(512000000)); //512M

myEnvConfig.setConfigParam("je.env.runCleaner", Boolean.toString(false));
myEnvConfig.setConfigParam("je.env.runCheckpointer", Boolean.toString(false));

myEnvConfig.setConfigParam("je.checkpointer.bytesInterval", Long.toString(400000000000l)); // 370GB 10000000000l
myEnvConfig.setConfigParam("je.cleaner.minAge", Integer.toString(1000)); // time between cleaning
myEnvConfig.setConfigParam("je.log.useODSYNC", Boolean.toString(true));

myEnvConfig.setConfigParam("je.maxMemory", Long.toString(25769803776l)); // 24GB
myEnvConfig.setConfigParam("je.tree.minMemory", Integer.toString(512000000));
myEnvConfig.setConfigParam("je.env.verifyBtreeBatchSize", Integer.toString(10000));

storeConfig.setAllowCreate(true);
storeConfig.setTransactional(false);
storeConfig.setDeferredWrite(true); //but remember to write to disk every so often
Expand Down Expand Up @@ -85,5 +93,10 @@ public void close(){
public EntityStore getEntityStore() {
return entityStore;
}

public Environment getEnvironment() {
return myEnv;
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import com.sleepycat.persist.EntityStore;
import com.sleepycat.persist.PrimaryIndex;

import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentMutableConfig;

import uk.ac.ebi.interpro.scan.model.SignatureLibrary;
import uk.ac.ebi.interpro.scan.precalc.berkeley.conversion.toi5.SignatureLibraryLookup;
Expand Down Expand Up @@ -38,24 +40,11 @@ public class CreateMatchDBFromIprscanBerkeleyDB {
private static String QUERY_ENABLE_DML = "alter session enable parallel dml";

private static String QUERY_TEMPORARY_TABLE =
"select /*+ PARALLEL (8) */ PROTEIN_MD5, SIGNATURE_LIBRARY_NAME, SIGNATURE_LIBRARY_RELEASE, " +
"SIGNATURE_ACCESSION, MODEL_ACCESSION, SEQ_START, SEQ_END, FRAGMENTS, SEQUENCE_SCORE, SEQUENCE_EVALUE, " +
"HMM_BOUNDS, HMM_START, HMM_END, HMM_LENGTH, ENVELOPE_START, ENVELOPE_END, SCORE, EVALUE," +
"SEQ_FEATURE" +
" from lookup_tmp_tab partition (partitionName) " +
" order by upi_range, PROTEIN_MD5";
//" where upi_range = ? " +
//" order by PROTEIN_MD5";

//" from lookup_tmp_tab " +
// " where upi_range = ? " +
// " order by upi_range, PROTEIN_MD5";
/*
" from lookup_tmp_tab partition (partitionName) " +
" where upi_range = ? " +
" order by PROTEIN_MD5";
*/

"SELECT /*+ PARALLEL (8) */ PROTEIN_MD5, SIGNATURE_LIBRARY_NAME, SIGNATURE_LIBRARY_RELEASE, " +
"SIGNATURE_ACCESSION, MODEL_ACCESSION, SEQ_START, SEQ_END, FRAGMENTS, SEQUENCE_SCORE, SEQUENCE_EVALUE, " +
"HMM_BOUNDS, HMM_START, HMM_END, HMM_LENGTH, ENVELOPE_START, ENVELOPE_END, SCORE, EVALUE, SEQ_FEATURE " +
"FROM lookup_tmp_tab PARTITION (partitionName) " +
"ORDER BY PROTEIN_MD5";

public static void main(String[] args) {
if (args.length < 4) {
Expand Down Expand Up @@ -124,6 +113,15 @@ void buildDatabase(String directoryPath, String databaseUrl, String username, St
System.out.println("Create the Berkeley DB Store and populate ... ");
try (BerkeleyDBStore lookupMatchDB = new BerkeleyDBStore()){
lookupMatchDB.create(dbStoreName, lookupMatchDBDirectory);

Environment env = lookupMatchDB.getEnvironment();
EnvironmentMutableConfig mutableConfig = env.getMutableConfig();

mutableConfig.setTxnNoSync(true);
// mutableConfig.setTxnWriteNoSync(true);

env.setMutableConfig(mutableConfig);

if (primIDX == null) {
primIDX = lookupMatchDB.getEntityStore().getPrimaryIndex(Long.class, KVSequenceEntry.class);
}
Expand All @@ -147,15 +145,10 @@ void buildDatabase(String directoryPath, String databaseUrl, String username, St
System.out.println(Utilities.getTimeNow() + " sql for this partition: " + partitionQueryLookupTable);
try (PreparedStatement ps = connection.prepareStatement(partitionQueryLookupTable)) {
//should we play witht eh featch array size
System.out.println(Utilities.getTimeNow() + " old FetchSize: " + ps.getFetchSize());
//System.out.println(Utilities.getTimeNow() + " old FetchSize: " + ps.getFetchSize());
ps.setFetchSize(fetchSize);
System.out.println(Utilities.getTimeNow() + " new FetchSize: " + ps.getFetchSize());
//ps.setString(1, partitionName);

//ps.setString(1, partitionName);
//ps.setString(1, partitionName);
//System.out.println(Utilities.getTimeNow() + " new FetchSize: " + ps.getFetchSize());

//ps.setString(2, partitionName);
//System.out.println(Utilities.getTimeNow() + "sql:" + ps.toString());
try (ResultSet rs = ps.executeQuery()) {
long endExecuteQueryMillis = System.currentTimeMillis();
Expand Down Expand Up @@ -265,7 +258,7 @@ void buildDatabase(String directoryPath, String databaseUrl, String username, St
String kvMatch = kvMatchJoiner.toString();

if (matchCount == 0) {
System.out.println(Utilities.getTimeNow() + " match 0: " + kvMatch.toString());
//System.out.println(Utilities.getTimeNow() + " match 0: " + kvMatch.toString());
}
if (match == null) {
match = new KVSequenceEntry();
Expand All @@ -285,7 +278,7 @@ void buildDatabase(String directoryPath, String databaseUrl, String username, St
matchCount++;
partitionMatchCount++;
if (partitionMatchCount == 1) {
System.out.println(Utilities.getTimeNow() + " match 1: " + match.toString());
//System.out.println(Utilities.getTimeNow() + " match 1: " + match.toString());
}
if (matchCount % 2000000 == 0) {
if (matchCount % 6000000 == 0) {
Expand Down

0 comments on commit e647d2d

Please sign in to comment.