Skip to content

Commit

Permalink
Include Power User Memory Limit and Test
Browse files Browse the repository at this point in the history
  • Loading branch information
AvocadoMoon committed Oct 11, 2024
1 parent 2266176 commit acd44a5
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 12 deletions.
4 changes: 3 additions & 1 deletion docker/build/Dockerfile-sched-dev
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \
maxPdeJobsPerUser="max-pde-jobs-per-user-not-set" \
htcMinMemoryMB="htc-min-memory-not-set" \
htcMaxMemoryMB="htc-max-memory-not-set" \
htcPowerUserMemoryFloorMB="htc-power-user-memory-floor-not-set"
htcPowerUserMemoryFloorMB="htc-power-user-memory-floor-not-set" \
htcPowerUserMemoryMaxMB="htc-power-user-memory-max-not-set"

ENV dbpswdfile=/run/secrets/dbpswd \
jmspswdfile=/run/secrets/jmspswd \
Expand Down Expand Up @@ -125,4 +126,5 @@ ENTRYPOINT java \
-Dvcell.htc.memory.min.mb=${htcMinMemoryMB} \
-Dvcell.htc.memory.max.mb=${htcMaxMemoryMB} \
-Dvcell.htc.memory.pu.floor.mb=${htcPowerUserMemoryFloorMB} \
-Dvcell.htc.memory.pu.max.mb=${htcPowerUserMemoryMaxMB} \
-cp "./lib/*" cbit.vcell.message.server.dispatcher.SimulationDispatcherMain
7 changes: 6 additions & 1 deletion docker/build/Dockerfile-submit-dev
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,11 @@ ENV softwareVersion=SOFTWARE-VERSION-NOT-SET \
vcell_ssh_cmd_cmdtimeout="cmdSrvcSshCmdTimeoutMS-not-set" \
vcell_ssh_cmd_restoretimeout="cmdSrvcSshCmdRestoreTimeoutFactor-not-set" \
simdatadir_archive_external="simdatadir_archive_external-not-set" \
simdatadir_archive_internal="simdatadir_archive_internal-not-set"
simdatadir_archive_internal="simdatadir_archive_internal-not-set" \
htcMinMemoryMB="htc-min-memory-not-set" \
htcMaxMemoryMB="htc-max-memory-not-set" \
htcPowerUserMemoryFloorMB="htc-power-user-memory-floor-not-set" \
htcPowerUserMemoryMaxMB="htc-power-user-memory-max-not-set"

ENV jmspswdfile=/run/secrets/jmspswd \
jmsrestpswdfile=/run/secrets/jmsrestpswd \
Expand Down Expand Up @@ -173,4 +177,5 @@ ENTRYPOINT java \
-Dvcell.htc.memory.min.mb=${htcMinMemoryMB} \
-Dvcell.htc.memory.max.mb=${htcMaxMemoryMB} \
-Dvcell.htc.memory.pu.floor.mb=${htcPowerUserMemoryFloorMB} \
-Dvcell.htc.memory.pu.max.mb=${htcPowerUserMemoryMaxMB} \
-cp "./lib/*" cbit.vcell.message.server.batch.sim.HtcSimulationWorker
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ public static void setConfigProvider(VCellConfigProvider configProvider) {
public static final String htcMinMemoryMB = record("vcell.htc.memory.min.mb", ValueType.INT); // minimum memory request in MB, currently 4g
public static final String htcMaxMemoryMB = record("vcell.htc.memory.max.mb", ValueType.INT); // maximum memory request in MB
public static final String htcPowerUserMemoryFloorMB = record("vcell.htc.memory.pu.floor.mb", ValueType.INT); // MIN memory allowed if declared to be a power user, currently 50g (Previously Existing Value)
public static final String htcPowerUserMemoryMaxMB = record("vcell.htc.memory.pu.max.mb", ValueType.INT); // MAX memory allowed if declared to be a power user

public static final String htc_vcellfvsolver_docker_name = record("vcell.htc.vcellfvsolver.docker.name",ValueType.GEN);
public static final String htc_vcellfvsolver_solver_list = record("vcell.htc.vcellfvsolver.solver.list",ValueType.GEN);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,8 @@ public static void main(String[] args) throws IOException {
PropertyLoader.slurm_qos_pu,
PropertyLoader.htcMinMemoryMB,
PropertyLoader.htcMaxMemoryMB,
PropertyLoader.htcPowerUserMemoryFloorMB
PropertyLoader.htcPowerUserMemoryFloorMB,
PropertyLoader.htcPowerUserMemoryMaxMB
};


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ public static void main(String[] args) {
PropertyLoader.maxJobsPerScan,
PropertyLoader.maxOdeJobsPerUser,
PropertyLoader.maxPdeJobsPerUser,
PropertyLoader.slurm_partition
PropertyLoader.slurm_partition,
PropertyLoader.htcPowerUserMemoryMaxMB,
PropertyLoader.htcMaxMemoryMB
};

}
Original file line number Diff line number Diff line change
Expand Up @@ -464,11 +464,11 @@ public synchronized void onDispatch(Simulation simulation, SimulationJobStatus o
double estimatedMemMB = simulationTask.getEstimatedMemorySizeMB();
double htcMinMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcMinMemoryMB));
double htcMaxMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcMaxMemoryMB));
double requestedMemoryMB = Math.max(estimatedMemMB, htcMinMemoryMB);

if (isPowerUser){
htcMaxMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryFloorMB));
htcMinMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryFloorMB));
htcMaxMemoryMB = Integer.parseInt(PropertyLoader.getRequiredProperty(PropertyLoader.htcPowerUserMemoryMaxMB));
}
double requestedMemoryMB = Math.max(estimatedMemMB, htcMinMemoryMB);

final SimulationJobStatus newSimJobStatus;
if (requestedMemoryMB > htcMaxMemoryMB) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class DispatcherTestUtils {
private static String previousHtcMax = "";
private static String previousHtcMin = "";
private static String previousHtcPowerFloor = "";
private static String previousHtcPowerMax = "";
private static String previousMongoBlob = "";
private static String previousJMSIntHostProperty = "";
private static String previousJMSIntPortProperty = "";
Expand Down Expand Up @@ -64,6 +65,9 @@ public static void setRequiredProperties(){
previousHtcPowerFloor = PropertyLoader.getProperty(PropertyLoader.htcPowerUserMemoryFloorMB, "");
PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryFloorMB, "51200");

previousHtcPowerMax = PropertyLoader.getProperty(PropertyLoader.htcPowerUserMemoryMaxMB, "");
PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryMaxMB, "64000");

previousMongoBlob = PropertyLoader.getProperty(PropertyLoader.jmsBlobMessageUseMongo, "");
PropertyLoader.setProperty(PropertyLoader.jmsBlobMessageUseMongo, "");

Expand Down Expand Up @@ -105,6 +109,7 @@ public static void restoreRequiredProperties(){
PropertyLoader.setProperty(PropertyLoader.htcMaxMemoryMB, previousHtcMax);
PropertyLoader.setProperty(PropertyLoader.htcMinMemoryMB, previousHtcMin);
PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryFloorMB, previousHtcPowerFloor);
PropertyLoader.setProperty(PropertyLoader.htcPowerUserMemoryMaxMB, previousHtcPowerMax);
PropertyLoader.setProperty(PropertyLoader.jmsBlobMessageUseMongo, previousMongoBlob);
PropertyLoader.setProperty(PropertyLoader.jmsIntPortInternal, previousJMSIntPortProperty);
PropertyLoader.setProperty(PropertyLoader.jmsIntHostInternal, previousJMSIntHostProperty);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ public class MockSimulationDB implements SimulationDatabase{
private HashMap<String, ArrayList<SimulationJobStatus>> dbTable = new HashMap<>();

public static User.SpecialUser specialAdmin = new User.SpecialUser("Tom", new KeyValue("999"), new User.SPECIAL_CLAIM[User.SPECIAL_CLAIM.admins.ordinal()]);
public static User.SpecialUser specialUser = new User.SpecialUser("Tim", new KeyValue("2"), new User.SPECIAL_CLAIM[]{User.SpecialUser.SPECIAL_CLAIM.powerUsers});
public static User.SpecialUser powerUser = new User.SpecialUser("Tim", new KeyValue("2"), new User.SPECIAL_CLAIM[]{User.SpecialUser.SPECIAL_CLAIM.powerUsers});

private final HashMap<String, User> users = new HashMap<>(){
{put(specialAdmin.getName(), specialAdmin); put(DispatcherTestUtils.alice.getName(), DispatcherTestUtils.alice);
put(specialUser.getName(), specialUser);}
put(powerUser.getName(), powerUser);}
};

private final HashMap<String, Simulation> simulations = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import org.vcell.util.DataAccessException;
import org.vcell.util.document.KeyValue;
import org.vcell.util.document.User;
import org.vcell.util.document.VCellServerID;

import java.beans.PropertyVetoException;
import java.sql.SQLException;
Expand Down Expand Up @@ -143,6 +142,8 @@ public void stateShouldTransitionToFailure() throws SQLException, DataAccessExce

//
Simulation memoryIntensiveSimulation = DispatcherTestUtils.createMockSimulation(900, 900, 900);
Simulation powerMemoryIntensiveSimulation = DispatcherTestUtils.createMockSimulation(9000, 9000, 5000, MockSimulationDB.powerUser);
powerMemoryIntensiveSimulation.getSolverTaskDescription().setTimeoutDisabled(true);

DispatcherTestUtils.insertOrUpdateStatus(simulationDB);
Assertions.assertThrows(RuntimeException.class,
Expand All @@ -156,6 +157,12 @@ public void stateShouldTransitionToFailure() throws SQLException, DataAccessExce
Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed(), "Memory size too large");
Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Failed because of memory size.");

DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, MockSimulationDB.powerUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB);
stateMachine.onDispatch(powerMemoryIntensiveSimulation, getLatestJobSubmission(), simulationDB, testMessageSession);
jobStatus = getLatestJobSubmission();
Assertions.assertTrue(jobStatus.getSchedulerStatus().isFailed(), "Memory size too large");
Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isFailed(), "Failed because of memory size.");

DispatcherTestUtils.insertOrUpdateStatus(simulationDB);
statusMessage = stateMachine.onStopRequest(DispatcherTestUtils.bob, getLatestJobSubmission(), simulationDB, testMessageSession);
Assertions.assertTrue(statusMessage.getSimulationJobStatus().getSchedulerStatus().isFailed(), "Stopping as another user.");
Expand Down Expand Up @@ -192,8 +199,8 @@ public void stateShouldTransitionToDispatched() throws SQLException, DataAccessE
Assertions.assertTrue(jobStatus.getSchedulerStatus().isDispatched());
Assertions.assertTrue(getClientTopicMessage().getSchedulerStatus().isDispatched());

DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, MockSimulationDB.specialUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB);
simulation = DispatcherTestUtils.createMockSimulation(900, 900, 900, MockSimulationDB.specialUser);
DispatcherTestUtils.insertOrUpdateStatus(simKey, jobIndex, taskID, MockSimulationDB.powerUser, SimulationJobStatus.SchedulerStatus.WAITING, simulationDB);
simulation = DispatcherTestUtils.createMockSimulation(900, 900, 900, MockSimulationDB.powerUser);
simulation.getSolverTaskDescription().setTimeoutDisabled(true);
stateMachine.onDispatch(simulation, getLatestJobSubmission(), simulationDB, testMessageSession);
jobStatus = getLatestJobSubmission();
Expand Down

0 comments on commit acd44a5

Please sign in to comment.