Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix rom dataobj #2051

Open
wants to merge 7 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ravenframework/Models/Dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ def _inputToInternal(self,dataIN):
localInput[entry] = []
value = dataSet.isel({dataIN.sampleTag: rlz})[entry].values
localInput[entry].append(value)
localInput['_indexMap'] = dict((k, v) for k, v in dataIN.getDimensions().items() if v)
#Now if an OutputPlaceHolder is used it is removed, this happens when the input data is not representing is internally manufactured
if 'OutputPlaceHolder' in dataIN.getVars('output'):
localInput.pop('OutputPlaceHolder') # this remove the counter from the inputs to be placed among the outputs
Expand Down
68 changes: 46 additions & 22 deletions ravenframework/Models/Model.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,29 +226,47 @@ def _readMoreXML(self,xmlNode):
except KeyError:
self.raiseADebug("Failed in Node: "+str(xmlNode),verbostiy='silent')
self.raiseAnError(IOError,'missed subType for the model '+self.name)

for child in xmlNode:
if child.tag =='alias':
# the input would be <alias variable='internal_variable_name'>Material|Fuel|thermal_conductivity</alias>
if 'variable' in child.attrib.keys():
if 'type' in child.attrib.keys():
if child.attrib['type'].lower() not in ['input','output']:
self.raiseAnError(IOError,'the type of alias can be either "input" or "output". Got '+child.attrib['type'].lower())
aliasType = child.attrib['type'].lower().strip()
self.raiseAnError(IOError, 'the type of alias can be either "input" or "output". Got '+child.attrib['type'].lower())
aliasType = child.attrib['type'].lower().strip()
complementAliasType = 'output' if aliasType == 'input' else 'input'
else:
self.raiseAnError(IOError,'not found the attribute "type" in the definition of one of the alias for model '+str(self.name) +' of type '+self.type)
self.raiseAnError(IOError, f'The attribute "type" was not found for an alias in {self.type} model "{self.name}"!')

varFramework, varModel = child.attrib['variable'], child.text.strip()
if varFramework in self.alias[aliasType].keys():
self.raiseAnError(IOError,' The alias for variable ' +varFramework+' has been already inputted in model '+str(self.name) +' of type '+self.type)
self.raiseAnError(
IOError,
f'The alias for variable "{varFramework}" already exists in {self.type} model "{self.name}"!'
)

if varModel in self.alias[aliasType].values():
self.raiseAnError(IOError,' The alias ' +varModel+' has been already used for another variable in model '+str(self.name) +' of type '+self.type)
if varFramework in self.alias[complementAliasType].keys():
self.raiseAnError(IOError,' The alias for variable ' +varFramework+' has been already inputted ('+complementAliasType+') in model '+str(self.name) +' of type '+self.type)
if varModel in self.alias[complementAliasType].values():
self.raiseAnError(IOError,' The alias ' +varModel+' has been already used ('+complementAliasType+') for another variable in model '+str(self.name) +' of type '+self.type)
self.raiseAnError(
IOError,
f'The alias "{varModel}" has been already used for another variable in {self.type} model "{self.name}"!'
)

if varFramework in self.alias[complementAliasType]:
self.raiseAnError(
IOError,
f'The alias for variable "{varFramework}" has already been input as "{complementAliasType}" for {self.type} model "{self.name}"!'
)

for key, val in self.alias[complementAliasType].items():
if val == varModel:
self.raiseAnError(
IOError,
f'The alias "{varModel}" has already been used as "{complementAliasType}" for variable "{key}" for {self.type} model "{self.name}"!'
)
self.alias[aliasType][varFramework] = child.text.strip()
else:
self.raiseAnError(IOError,'not found the attribute "variable" in the definition of one of the alias for model '+str(self.name) +' of type '+self.type)
self.raiseAnError(IOError, f'The "variable" attribute is missing in one of the aliases for {self.type} model "{self.name}"!')
# read local information
self.localInputAndChecks(xmlNode)
#################
Expand Down Expand Up @@ -298,19 +316,25 @@ def _replaceVariablesNamesWithAliasSystem(self, sampledVars, aliasType='input',
listAliasType = [aliasType]
originalVariables = copy.deepcopy(sampledVars)
for aliasTyp in listAliasType:
for varFramework,varModel in self.alias[aliasTyp].items():
whichVar = varModel if fromModelToFramework else varFramework
notFound = 2**62
if type(originalVariables).__name__ != 'list':
found = sampledVars.pop(whichVar,[notFound])
if not np.array_equal(np.asarray(found), [notFound]):
if fromModelToFramework:
sampledVars[varFramework] = originalVariables[varModel]
else:
sampledVars[varModel] = originalVariables[varFramework]
for varFramework, varModel in self.alias[aliasTyp].items():
oldName = varModel if fromModelToFramework else varFramework
newName = varFramework if fromModelToFramework else varModel
if isinstance(originalVariables, dict):
# replace the old name with the new one, if present
if oldName in sampledVars:
value = sampledVars.pop(oldName)
sampledVars[newName] = value
elif isinstance(sampledVars, list):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realize originalVariables is a deepcopy of sampledVars, but it would be nice if this set of if isinstance checked on the same variable instead of the two different ones.

# options apparently are only a dict or list, so we're a list
# find the index of the aliased variable and replace it in the list
try:
index = sampledVars.index(oldName)
except ValueError:
# index wasn't found, no action necessary
continue
sampledVars[index] = newName
else:
if whichVar in sampledVars:
sampledVars[sampledVars.index(whichVar)] = varFramework if fromModelToFramework else varModel
self.raiseAnError(RuntimeError, 'Unrecognized alias list type:', type(sampledVars))
return originalVariables

def _handleInput(self, paramInput):
Expand Down
9 changes: 5 additions & 4 deletions ravenframework/Models/PostProcessors/TSACharacterizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,15 @@ def run(self, inp):
self.checkInput(inp)
inp = inp[0]
rlzs = []
targets = self.getTSATargets()
for r, rlz in enumerate(inp.sliceByIndex(inp.sampleTag)):
self.raiseADebug(f'Characterizing realization {r} ...')
self._tsaReset()
pivots = rlz[self.pivotParameterID]
targetVals = np.zeros((1, len(pivots), len(self.target))) # shape: (rlzs, time, targets)
for t, target in enumerate(self.target):
pivots = rlz[self._tsaPivotName]
targetVals = np.zeros((1, len(pivots), len(targets))) # shape: (rlzs, time, targets)
for t, target in enumerate(targets):
targetVals[0, :, t] = rlz[target]
self.trainTSASequential(targetVals)
self.trainTSASequential(targetVals, pivots=pivots)
rlz = self.getParamsAsVars()
rlzs.append(rlz)
return rlzs
Expand Down
171 changes: 104 additions & 67 deletions ravenframework/Models/ROM.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,26 +107,28 @@ def __init__(self):
@ Out, None
"""
super().__init__()
self.amITrained = False # boolean flag, is the ROM trained?
self.printTag = 'ROM MODEL' # label
self.cvInstanceName = None # the name of Cross Validation instance
self.cvInstance = None # Instance of provided cross validation
self._estimatorNameList = [] # the name list of estimator instance
self._estimatorList = [] # List of instances of provided estimators (ROM)
self._interfaceROM = None # Instance of provided ROM

self.pickled = False # True if ROM comes from a pickled rom
self.pivotParameterId = 'time' # The name of pivot parameter
self.canHandleDynamicData = False # check if the model can autonomously handle the time-dependency
# if not and time-dep data are passed in, a list of ROMs are constructed
self.isADynamicModel = False # True if the ROM is time-dependent
self.supervisedContainer = [] # List ROM instances
self.historySteps = [] # The history steps of pivot parameter
self.segment = False # True if segmenting/clustering/interpolating is requested
self.numThreads = 1 # number of threads used by the ROM
self.seed = None # seed information
self._segmentROM = None # segment rom instance
self._paramInput = None # the parsed xml input
self.amITrained = False # boolean flag, is the ROM trained?
self.printTag = 'ROM MODEL' # label
self.cvInstanceName = None # the name of Cross Validation instance
self.cvInstance = None # Instance of provided cross validation
self._estimatorNameList = [] # the name list of estimator instance
self._estimatorList = [] # List of instances of provided estimators (ROM)
self._interfaceROM = None # Instance of provided ROM
self.trainingSet = None # Data or instance from which this was trained

self.pickled = False # True if ROM comes from a pickled rom
self.pivotParameterId = 'time' # The name of pivot parameter
self.canHandleDynamicData = False # check if the model can autonomously handle the time-dependency
# if not and time-dep data are passed in, a list of ROMs are constructed

self.isADynamicModel = False # True if the ROM is time-dependent
self.supervisedContainer = [] # List ROM instances
self.historySteps = [] # The history steps of pivot parameter
self.segment = False # True if segmenting/clustering/interpolating is requested
self.numThreads = 1 # number of threads used by the ROM
self.seed = None # seed information
self._segmentROM = None # segment rom instance
self._paramInput = None # the parsed xml input

# for Clustered ROM
self.addAssemblerObject('Classifier', InputData.Quantity.zero_to_one)
Expand Down Expand Up @@ -347,66 +349,101 @@ def train(self,trainingSet):
@ In, trainingSet, dict or PointSet or HistorySet, data used to train the ROM; if an HistorySet is provided the a list of ROM is created in order to create a temporal-ROM
@ Out, None
"""
if type(trainingSet).__name__ == 'ROM':
self.trainingSet = copy.copy(trainingSet.trainingSet)
self.amITrained = copy.deepcopy(trainingSet.amITrained)
self.supervisedContainer = copy.deepcopy(trainingSet.supervisedContainer)
self.seed = trainingSet.seed
if isinstance(trainingSet, ROM):
self.trainFromInstance(trainingSet)
else:
# TODO: The following check may need to be moved to Dummy Class -- wangc 7/30/2018
if type(trainingSet).__name__ != 'dict' and trainingSet.type == 'HistorySet':
if type(trainingSet) != dict and trainingSet.type == 'HistorySet':
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

First, could you add a description to list all possible data structures for trainingSet?
Second, could you add checks for different data structures for trainingSet?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like a specific check for history set alignment, right? I don't know if we need to find out all the different approaches to ROMs within this PR, do we? This sounds like a bigger issue.

if not trainingSet.checkIndexAlignment(indexesToCheck=self.pivotParameterId):
self.raiseAnError(IOError, "The data provided by the data object", trainingSet.name, "is not synchonized!",
"The time-dependent ROM requires all the histories are synchonized!")
self.raiseAnError(
IOError,
f"The data provided by the data object {trainingSet.name}, is not synchonized!",
"The time-dependent ROM requires all the histories are synchonized!"
)
self.trainingSet = copy.copy(self._inputToInternal(trainingSet))
self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False)
if not self.supervisedContainer[0].requireJobHandler and 'jobHandler' in self.assemblerDict:
self.assemblerDict.pop("jobHandler")

# LEGACY SupervisedLearning (SVL) objects train on dictionaries/matrices
# New SVL can bypass the data manip and use the dataset directly
useDict = self.supervisedContainer[0].needsDictTraining
if useDict:
self.trainingSet = copy.copy(self._inputToInternal(trainingSet))
else:
self.trainingSet = trainingSet

self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you check to see if this line works with your proposed data structure? In Model.py, this method only accept dict or list as input.

self.supervisedContainer[0].setAssembledObjects(self.assemblerDict)
# if training using ROMCollection, special treatment
if self.segment:

if not useDict or self.segment:
self.supervisedContainer[0].train(self.trainingSet)
else:
# not a collection # TODO move time-dependent snapshots to collection!
## time-dependent or static ROM?
if any(type(x).__name__ == 'list' for x in self.trainingSet.values()):
# we need to build a "time-dependent" ROM
self.isADynamicModel = True
if self.pivotParameterId not in list(self.trainingSet.keys()):
self.raiseAnError(IOError, 'The pivot parameter "{}" is not present in the training set.'.format(self.pivotParameterId),
'A time-dependent-like ROM cannot be created!')
if type(self.trainingSet[self.pivotParameterId]).__name__ != 'list':
self.raiseAnError(IOError, 'The pivot parameter "{}" is not a list.'.format(self.pivotParameterId),
" Are you sure it is part of the output space of the training set?")
self.historySteps = self.trainingSet.get(self.pivotParameterId)[-1]
if not len(self.historySteps):
self.raiseAnError(IOError, "the training set is empty!")
# intrinsically time-dependent or does the Gate need to handle it?
if self.canHandleDynamicData:
# the ROM is able to manage the time dependency on its own
self.supervisedContainer[-1].train(self.trainingSet)
else:
# TODO we can probably migrate this time-dependent handling to a type of ROMCollection!
# we need to construct a chain of ROMs
# the check on the number of time steps (consistency) is performed inside the historySnapShoots method
# get the time slices
newTrainingSet = mathUtils.historySnapShoots(self.trainingSet, len(self.historySteps))
assert type(newTrainingSet).__name__ == 'list'
# copy the original ROM
originalROM = self.supervisedContainer[0]
# start creating and training the time-dep ROMs
self.supervisedContainer = [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))]
# train
for ts in range(len(self.historySteps)):
self.supervisedContainer[ts].train(newTrainingSet[ts])
# if a static ROM ...
else:
#self._replaceVariablesNamesWithAliasSystem(self.trainingSet, 'inout', False)
self.supervisedContainer[0].train(self.trainingSet)
# END if ROMCollection
self.trainLegacy()
self.amITrained = True

def trainFromInstance(self, rom):
"""
Trains ROM model from an already-trained ROM instance
@ In, rom, ROM, rom instance
@ Out, None
"""
self.trainingSet = copy.copy(rom.trainingSet)
self.amITrained = copy.deepcopy(rom.amITrained)
self.supervisedContainer = copy.deepcopy(rom.supervisedContainer)
self.seed = rom.seed

def trainLegacy(self):
"""
Train SLVs in the legacy style, with data manipulations and dynamic behavior checking
@ In, None
@ Out, None
"""
## time-dependent or static ROM?
if any(isinstance(x, list) for x in self.trainingSet.values()):
# we need to build a "time-dependent" ROM
self.isADynamicModel = True

if self.pivotParameterId not in list(self.trainingSet.keys()):
self.raiseAnError(
IOError,
f'The pivot parameter "{self.pivotParameterId}" is not present in the training set. ',
'A time-dependent-like ROM cannot be created!'
)

if type(self.trainingSet[self.pivotParameterId]).__name__ != 'list':
self.raiseAnError(
IOError,
f'The pivot parameter "{self.pivotParameterId}" is not a list. ',
'Are you sure it is part of the output space of the training set?'
)

self.historySteps = self.trainingSet.get(self.pivotParameterId)[-1]

if not len(self.historySteps):
self.raiseAnError(IOError, "the training set is empty!")

# intrinsically time-dependent or does the Gate need to handle it?
if self.canHandleDynamicData:
# the ROM is able to manage the time dependency on its own
self.supervisedContainer[-1].train(self.trainingSet, indexMap=self.trainingSet.get('_indexMap', None))
else:
# TODO we can probably migrate this time-dependent handling to a type of ROMCollection!
# we need to construct a chain of ROMs
# the check on the number of time steps (consistency) is performed inside the historySnapShoots method
# get the time slices
newTrainingSet = mathUtils.historySnapShoots(self.trainingSet, len(self.historySteps))
assert type(newTrainingSet).__name__ == 'list'
# copy the original ROM
originalROM = self.supervisedContainer[0]
# start creating and training the time-dep ROMs
self.supervisedContainer = [copy.deepcopy(originalROM) for _ in range(len(self.historySteps))]
for ts in range(len(self.historySteps)):
self.supervisedContainer[ts].train(newTrainingSet[ts])
else:
self.supervisedContainer[0].train(self.trainingSet)


def confidence(self,request,target = None):
"""
This is to get a value that is inversely proportional to the confidence that we have
Expand Down
2 changes: 1 addition & 1 deletion ravenframework/SupervisedLearning/KerasBase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2174,7 +2174,7 @@ def writeXML(self, writeTo, targets=None, skip=None):
writeTo.addScalar('Loss',"Training",' '.join([str(elm) for elm in self._romHistory.history['loss']]))
writeTo.addScalar('Loss',"Testing",' '.join([str(elm) for elm in self._romHistory.history['val_loss']]))

def train(self,tdict):
def train(self, tdict, indexMap=None):
"""
Method to perform the training of the deep neural network algorithm
NB.the KerasBase object is committed to convert the dictionary that is passed (in), into the local format
Expand Down
Loading