Skip to content

Commit

Permalink
move randomized abstraction building to Stormpy
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Dec 6, 2023
1 parent 2b53cf0 commit fefb966
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 81 deletions.
62 changes: 1 addition & 61 deletions paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,64 +66,7 @@ def compute_choice_destinations(mdp):
choice_destinations.append(destinations)
return choice_destinations

@staticmethod
def randomize_action_variant(mdp, state_action_choices):
'''
Given an MDP having multiple variants of actions, create an MDP in which this variant selection is randomized.
'''

components = stormpy.storage.SparseModelComponents()
# copy state labeling
state_labeling = stormpy.storage.StateLabeling(mdp.nr_states)
for label in mdp.labeling.get_labels():
state_labeling.add_label(label)
state_labeling.set_states(label, mdp.labeling.get_states(label))
components.state_labeling = state_labeling

# build transition matrix and reward models
reward_vectors = {name:[] for name in mdp.reward_models}
builder = stormpy.storage.SparseMatrixBuilder(has_custom_row_grouping=True)
tm = mdp.transition_matrix
num_rows = 0
choice_to_action = []
for state in range(mdp.nr_states):
builder.new_row_group(num_rows)
for action,choices in enumerate(state_action_choices[state]):
if not choices:
continue

# new choice
choice_to_action.append(action)

# handle transition matrix
num_choices = len(choices)
dst_prob = collections.defaultdict(int)
for choice in choices:
for entry in tm.get_row(choice):
dst_prob[entry.column] += entry.value()/num_choices

for dst,prob in dst_prob.items():
builder.add_next_value(num_rows,dst,prob)
num_rows += 1

# handle reward models
for name,reward_model in mdp.reward_models.items():
reward_value = 0
for choice in choices:
reward_value += reward_model.get_state_action_reward(choice)
reward_value /= num_choices
reward_vectors[name].append(reward_value)

components.transition_matrix = builder.build()
components.reward_models = {}
for name,state_action_reward in reward_vectors.items():
components.reward_models[name] = stormpy.SparseRewardModel(optional_state_action_reward_vector=state_action_reward)
model = stormpy.storage.SparseMdp(components)

return model,choice_to_action



def __init__(self, quotient_mdp, coloring, specification):
super().__init__(quotient_mdp = quotient_mdp, coloring = coloring, specification = specification)

Expand All @@ -134,10 +77,7 @@ def __init__(self, quotient_mdp, coloring, specification):
self.quotient_mdp, self.num_actions, self.choice_to_action)
self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
self.choice_destinations = MdpFamilyQuotientContainer.compute_choice_destinations(self.quotient_mdp)

self.randomized_abstraction = MdpFamilyQuotientContainer.randomize_action_variant(
self.quotient_mdp, self.state_action_choices)



@property
def num_actions(self):
Expand Down
39 changes: 19 additions & 20 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,11 @@ def postprocess(self, quotient, prop):

class SynthesizerPolicyTree(paynt.synthesizer.synthesizer.Synthesizer):

# if True (False), then the game (randomized) abstraction will be used
use_game_abstraction = True
# if True, then the game scheduler will be used for splitting (incompatible with randomized abstraction)
use_optimistic_splitting = True

@property
def method_name(self):
return "AR (policy tree)"
Expand Down Expand Up @@ -400,7 +405,7 @@ def solve_randomized_abstraction(self, family, prop):
action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]]
choice_to_action.append(action)
state_action_choices = self.quotient.map_state_action_to_choices(family.mdp.model,self.quotient.num_actions,choice_to_action)
model,choice_to_action = self.quotient.randomize_action_variant(family.mdp.model, state_action_choices)
model,choice_to_action, = stormpy.synthesis.randomize_action_variant(family.mdp.model, state_action_choices)
assert family.mdp.model.nr_states == model.nr_states

result = stormpy.synthesis.verify_mdp(paynt.quotient.models.MarkovChain.environment,model,prop.formula,True)
Expand All @@ -421,7 +426,7 @@ def solve_randomized_abstraction(self, family, prop):
return policy,policy_sat



def verify_family(self, family, game_solver, prop, reference_policy=None):
self.quotient.build(family)
mdp_family_result = MdpFamilyResult()
Expand All @@ -440,17 +445,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
mdp_family_result.policy_source = "singleton"
return mdp_family_result

if False:
randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop)
if randomized_sat:
randomized_policy_sat = self.verify_policy(family,prop,randomized_policy)
if randomized_policy_sat:
mdp_family_result.policy = randomized_policy
mdp_family_result.policy_source = "randomized abstraction"
return mdp_family_result

if True:
# game abstraction
if SynthesizerPolicyTree.use_game_abstraction:
game_policy,game_sat = self.solve_game_abstraction(family,prop,game_solver)
if game_sat:
game_policy_sat = self.verify_policy(family,prop,game_policy)
Expand All @@ -460,8 +455,14 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
return mdp_family_result
else:
print("game YES but nor forall family of size ", family.size)


else:
randomized_policy,randomized_sat = self.solve_randomized_abstraction(family,prop)
if randomized_sat:
randomized_policy_sat = self.verify_policy(family,prop,randomized_policy)
if randomized_policy_sat:
mdp_family_result.policy = randomized_policy
mdp_family_result.policy_source = "randomized abstraction"
return mdp_family_result

# solve primary-primary direction for the family
# logger.debug("solving primary-primary direction...")
Expand All @@ -474,12 +475,10 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):


# undecided: choose scheduler choices to be used for splitting
if False:
# optimistic splitting
if SynthesizerPolicyTree.use_optimistic_splitting:
scheduler_choices = game_solver.solution_reachable_choices
state_values = game_solver.solution_state_values
else:
# pessimistic splitting
scheduler_choices = stormpy.BitVector(self.quotient.quotient_mdp.nr_choices,False)
state_values = [0] * self.quotient.quotient_mdp.nr_states
for state in range(family.mdp.states):
Expand Down Expand Up @@ -579,7 +578,7 @@ def synthesize_policy_tree(self, family):
self.last_splitter = -1
prop = self.quotient.specification.constraints[0]
game_solver = self.quotient.build_game_abstraction_solver(prop)
game_solver.enable_profiling(True)
# game_solver.enable_profiling(True)
policy_tree = PolicyTree(family)

reference_policy = None
Expand Down Expand Up @@ -610,7 +609,7 @@ def synthesize_policy_tree(self, family):
policy_tree_node.split(result.splitter,suboptions,subfamilies)
policy_tree_leaves = policy_tree_leaves + policy_tree_node.child_nodes

game_solver.print_profiling()
# game_solver.print_profiling()
policy_tree.double_check_all_families(self.quotient, prop)
policy_tree.print_stats()
policy_tree.postprocess(self.quotient, prop)
Expand Down

0 comments on commit fefb966

Please sign in to comment.