From ced819f47d09b3cb3896986fc8511c5d271a28bd Mon Sep 17 00:00:00 2001 From: Filip Macak Date: Tue, 5 Dec 2023 17:36:31 +0100 Subject: [PATCH 1/4] Action coloring construction in family of MDPs --- paynt/quotient/quotient.py | 17 +++++++ paynt/synthesizer/policy_tree.py | 76 +++++++++++++++++++++++++++++++- 2 files changed, 92 insertions(+), 1 deletion(-) diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 98655707d..6a38e9a2f 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -91,6 +91,23 @@ def build(self, family): # prepare to discard designs self.discarded = 0 + + def build_with_coloring(self, family, coloring): + ''' Construct the quotient MDP for the family. ''' + + # select actions compatible with the family and restrict the quotient + hole_selected_actions,selected_actions,selected_actions_bv = coloring.select_actions(family) + family.mdp = self.build_from_choice_mask(selected_actions_bv) + family.mdp.design_space = family + + # cash restriction information + family.hole_selected_actions = hole_selected_actions + family.selected_actions = selected_actions + family.selected_actions_bv = selected_actions_bv + + # prepare to discard designs + self.discarded = 0 + @staticmethod def mdp_to_dtmc(mdp): diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index e952eb3fa..19b6f7d79 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -1,6 +1,7 @@ import stormpy.synthesis import paynt.quotient.holes +import paynt.quotient.coloring import paynt.quotient.models import paynt.synthesizer.synthesizer @@ -525,7 +526,7 @@ def choose_splitter(self, family, prop, scheduler_choices, state_values, hole_se assert splitter is not None return splitter - + def split(self, family, prop, scheduler_choices, state_values, hole_selection): splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection) @@ -558,6 +559,75 @@ def split(self, family, prop, scheduler_choices, state_values, hole_selection): subfamilies.append(subfamily) return splitter,suboptions,subfamilies + + + def create_action_coloring(self, quotient_mdp): + + holes = paynt.quotient.holes.Holes() + action_to_hole_options = [] + for state in quotient_mdp.states: + + state_actions = self.quotient.state_to_actions[int(state)] + if len(state_actions) <= 1: + for action in range(quotient_mdp.get_nr_available_actions(state)): + action_to_hole_options.append({}) + continue + + name = f'state_{state}' + options = list(range(len(state_actions))) + option_labels = [self.quotient.action_labels[action] for action in state_actions] + hole = paynt.quotient.holes.Hole(name, options, option_labels) + holes.append(hole) + + for action in range(quotient_mdp.get_nr_available_actions(state)): + choice = quotient_mdp.get_choice_index(state, action) + choice_index = -1 + for index, action_list in enumerate(list(self.quotient.state_action_choices[int(state)])): + if choice in action_list: + choice_index = index + break + assert choice_index != -1 + + hole_options = {len(holes)-1: state_actions.index(choice_index)} + action_to_hole_options.append(hole_options) + + coloring = paynt.quotient.coloring.Coloring(quotient_mdp, holes, action_to_hole_options) + + return coloring + + + def synthesize_policy_for_tree_node(self, family_tree_node, prop): + family = family_tree_node.family + assert family.mdp is not None + + all_mdp_families = [] + action_coloring = self.create_action_coloring(family.mdp.model) + action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes) + + for hole_assignment in family.all_combinations(): + subfamily = family.copy() + for hole_index, hole_option in enumerate(hole_assignment): + subfamily.assume_hole_options(hole_index, [hole_option]) + all_mdp_families.append(subfamily) + + # TODO check if all MDPs SAT (what to do if not?) + # self.quotient.build(subfamily) + # primary_result = subfamily.mdp.model_check_property(prop) + # self.stat.iteration_mdp(subfamily.mdp.states) + + # if primary_result.sat == False: + # return False + + # self.quotient.build(all_mdp_families[0]) + # primary_result = all_mdp_families[0].mdp.model_check_property(prop) + # self.stat.iteration_mdp(all_mdp_families[0].mdp.states) + #print(all_mdp_families[0].mdp.quotient_state_map) + #print(primary_result.result.scheduler) + + + + + @@ -569,6 +639,10 @@ def synthesize_policy_tree(self, family): game_solver.enable_profiling(True) policy_tree = PolicyTree(family) + self.quotient.build(policy_tree.root.family) + self.synthesize_policy_for_tree_node(policy_tree.root, prop) + exit() + reference_policy = None policy_tree_leaves = [policy_tree.root] while policy_tree_leaves: From 40d1efafeb4acd36ce85d94048d00e48b5de6c0e Mon Sep 17 00:00:00 2001 From: Filip Macak Date: Wed, 6 Dec 2023 00:51:37 +0100 Subject: [PATCH 2/4] Added mdp building from two colorings --- paynt/quotient/quotient.py | 26 +++++++++++++++++++------- paynt/synthesizer/policy_tree.py | 15 +++++++++++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 6a38e9a2f..e7475ffe2 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -92,18 +92,30 @@ def build(self, family): self.discarded = 0 - def build_with_coloring(self, family, coloring): + def build_with_second_coloring(self, main_coloring, main_family, alt_family): ''' Construct the quotient MDP for the family. ''' # select actions compatible with the family and restrict the quotient - hole_selected_actions,selected_actions,selected_actions_bv = coloring.select_actions(family) - family.mdp = self.build_from_choice_mask(selected_actions_bv) - family.mdp.design_space = family + alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(alt_family) + main_hole_selected_actions,main_selected_actions,main_selected_actions_bv = main_coloring.select_actions(main_family) + + # print(alt_hole_selected_actions) + # print(main_hole_selected_actions) + # print() + # print(alt_selected_actions) + # print(main_selected_actions) + # print() + # print(alt_selected_actions_bv) + # print(main_selected_actions_bv) + + selected_actions_bv = main_selected_actions_bv.__and__(alt_selected_actions_bv) + main_family.mdp = self.build_from_choice_mask(selected_actions_bv) + main_family.mdp.design_space = main_family # cash restriction information - family.hole_selected_actions = hole_selected_actions - family.selected_actions = selected_actions - family.selected_actions_bv = selected_actions_bv + main_family.hole_selected_actions = main_hole_selected_actions + main_family.selected_actions = main_selected_actions + main_family.selected_actions_bv = selected_actions_bv # prepare to discard designs self.discarded = 0 diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 19b6f7d79..ad6ae533c 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -624,6 +624,21 @@ def synthesize_policy_for_tree_node(self, family_tree_node, prop): #print(all_mdp_families[0].mdp.quotient_state_map) #print(primary_result.result.scheduler) + + action_family_stack = [action_family] + + while action_family_stack: + + current_action_family = action_family_stack.pop(-1) + + for mdp_subfamily in all_mdp_families: + self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) + + primary_result = current_action_family.mdp.model_check_property(prop) + print(primary_result) + + exit() + From 7ab99521479057f61ba0c56342c1d218c91e737b Mon Sep 17 00:00:00 2001 From: Filip Macak Date: Tue, 5 Dec 2023 22:11:39 +0100 Subject: [PATCH 3/4] Finished first version of one policy for all mdps method --- paynt/quotient/quotient.py | 22 ++++++- paynt/synthesizer/policy_tree.py | 99 +++++++++++++++++++++++++------- 2 files changed, 99 insertions(+), 22 deletions(-) diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index e7475ffe2..0ca9874fe 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -158,7 +158,27 @@ def scheduler_selection(self, mdp, scheduler): selection[hole_index].add(option) selection = [list(options) for options in selection] - return selection + return selection + + def scheduler_selection_with_coloring(self, mdp, scheduler, coloring): + ''' Get hole options involved in the scheduler selection. ''' + assert scheduler.memoryless and scheduler.deterministic + + # construct DTMC that corresponds to this scheduler and filter reachable states/choices + choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices) + dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices) + choices = [ choice_map[state] for state in range(dtmc.nr_states) ] + + # map relevant choices to hole options + selection = [set() for hole_index in mdp.design_space.hole_indices] + for choice in choices: + global_choice = mdp.quotient_choice_map[choice] + choice_options = coloring.action_to_hole_options[global_choice] + for hole_index,option in choice_options.items(): + selection[hole_index].add(option) + selection = [list(options) for options in selection] + + return selection @staticmethod diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index ad6ae533c..6e5c4a5ee 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -455,7 +455,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None): else: print("game YES but nor forall family of size ", family.size) - + # solve primary-primary direction for the family # logger.debug("solving primary-primary direction...") @@ -596,27 +596,36 @@ def create_action_coloring(self, quotient_mdp): return coloring - def synthesize_policy_for_tree_node(self, family_tree_node, prop): - family = family_tree_node.family - assert family.mdp is not None + def update_scores(self, score_lists, selection): + for hole, score_list in score_lists.items(): + for choice in selection[hole]: + if choice not in score_list: + score_list.append(choice) + + - all_mdp_families = [] - action_coloring = self.create_action_coloring(family.mdp.model) + def synthesize_policy_for_tree_node(self, family, prop): + sat_mdp_families = [] + action_coloring = self.create_action_coloring(self.quotient.quotient_mdp) action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes) + unsat_mdp_families = [] + for hole_assignment in family.all_combinations(): subfamily = family.copy() for hole_index, hole_option in enumerate(hole_assignment): subfamily.assume_hole_options(hole_index, [hole_option]) - all_mdp_families.append(subfamily) # TODO check if all MDPs SAT (what to do if not?) - # self.quotient.build(subfamily) - # primary_result = subfamily.mdp.model_check_property(prop) - # self.stat.iteration_mdp(subfamily.mdp.states) + self.quotient.build(subfamily) + primary_result = subfamily.mdp.model_check_property(prop) + self.stat.iteration_mdp(subfamily.mdp.states) + + if primary_result.sat == False: + unsat_mdp_families.append(subfamily) + continue - # if primary_result.sat == False: - # return False + sat_mdp_families.append(subfamily) # self.quotient.build(all_mdp_families[0]) # primary_result = all_mdp_families[0].mdp.model_check_property(prop) @@ -624,25 +633,69 @@ def synthesize_policy_for_tree_node(self, family_tree_node, prop): #print(all_mdp_families[0].mdp.quotient_state_map) #print(primary_result.result.scheduler) - + action_family_stack = [action_family] while action_family_stack: current_action_family = action_family_stack.pop(-1) + current_results = [] - for mdp_subfamily in all_mdp_families: - self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) + score_lists = {hole:[] for hole in current_action_family.hole_indices if len(current_action_family[hole].options) > 1} + + for mdp_subfamily in sat_mdp_families: + self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) # maybe copy to new family? primary_result = current_action_family.mdp.model_check_property(prop) - print(primary_result) + self.stat.iteration_mdp(current_action_family.mdp.states) - exit() + if primary_result.sat == False: + current_results.append(False) + break - + current_results.append(primary_result) + selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, action_coloring) + self.update_scores(score_lists, selection) + scores = {hole:len(score_list) for hole, score_list in score_lists.items()} + + splitters = self.quotient.holes_with_max_score(scores) + splitter = splitters[0] + if scores[splitter] > 1: + break + else: + return True, unsat_mdp_families + if False in current_results: + continue + + used_options = score_lists[splitter] + core_suboptions = [[option] for option in used_options] + other_suboptions = [option for option in current_action_family[splitter].options if option not in used_options] + if other_suboptions: + other_suboptions = [other_suboptions] + else: + other_suboptions = [] + suboptions = other_suboptions + core_suboptions # DFS solves core first + + subfamilies = [] + current_action_family.splitter = splitter + new_design_space = current_action_family.copy() + for suboption in suboptions: + subholes = new_design_space.subholes(splitter, suboption) + action_subfamily = paynt.quotient.holes.DesignSpace(subholes) + action_subfamily.assume_hole_options(splitter, suboption) + subfamilies.append(action_subfamily) + + # for subf in subfamilies: + # print(subf) + # print() + + action_family_stack = action_family_stack + subfamilies + #print(len(action_family_stack)) + + return False, unsat_mdp_families @@ -654,9 +707,10 @@ def synthesize_policy_tree(self, family): game_solver.enable_profiling(True) policy_tree = PolicyTree(family) - self.quotient.build(policy_tree.root.family) - self.synthesize_policy_for_tree_node(policy_tree.root, prop) - exit() + # self.quotient.build(policy_tree.root.family) + # policy_exists, _ = self.synthesize_policy_for_tree_node(policy_tree.root.family, prop) + # print(policy_exists) + # exit() reference_policy = None policy_tree_leaves = [policy_tree.root] @@ -669,6 +723,9 @@ def synthesize_policy_tree(self, family): policy_tree_node.policy = result.policy policy_tree_node.policy_source = result.policy_source + policy_exists, _ = self.synthesize_policy_for_tree_node(family, prop) + print(policy_exists) + if result.policy == False: reference_policy = None self.explore(family) From 68f0a909430edc883680792707279e72fc1290df Mon Sep 17 00:00:00 2001 From: Filip Macak Date: Wed, 6 Dec 2023 18:39:55 +0100 Subject: [PATCH 4/4] Algorithm for finding one policy for a family of MDPs --- paynt/quotient/quotient.py | 15 ++-- paynt/synthesizer/policy_tree.py | 117 ++++++++++++++++++++++--------- 2 files changed, 89 insertions(+), 43 deletions(-) diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py index 0ca9874fe..dd6a46c02 100644 --- a/paynt/quotient/quotient.py +++ b/paynt/quotient/quotient.py @@ -92,25 +92,18 @@ def build(self, family): self.discarded = 0 - def build_with_second_coloring(self, main_coloring, main_family, alt_family): + def build_with_second_coloring(self, family, main_coloring, main_family): ''' Construct the quotient MDP for the family. ''' # select actions compatible with the family and restrict the quotient - alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(alt_family) + alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(family) main_hole_selected_actions,main_selected_actions,main_selected_actions_bv = main_coloring.select_actions(main_family) - # print(alt_hole_selected_actions) - # print(main_hole_selected_actions) - # print() - # print(alt_selected_actions) - # print(main_selected_actions) - # print() - # print(alt_selected_actions_bv) - # print(main_selected_actions_bv) - selected_actions_bv = main_selected_actions_bv.__and__(alt_selected_actions_bv) main_family.mdp = self.build_from_choice_mask(selected_actions_bv) main_family.mdp.design_space = main_family + family.mdp = self.build_from_choice_mask(selected_actions_bv) + family.mdp.design_space = family # cash restriction information main_family.hole_selected_actions = main_hole_selected_actions diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py index 604c5e8d6..e6c804d4c 100644 --- a/paynt/synthesizer/policy_tree.py +++ b/paynt/synthesizer/policy_tree.py @@ -614,58 +614,101 @@ def update_scores(self, score_lists, selection): for choice in selection[hole]: if choice not in score_list: score_list.append(choice) + + + def create_policy(self, scheduler, family): + choice_to_action = [] + for choice in range(family.mdp.choices): + action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]] + choice_to_action.append(action) + + policy = self.quotient.empty_policy() + for state in range(family.mdp.model.nr_states): + state_choice = scheduler.get_choice(state).get_deterministic_choice() + choice = family.mdp.model.transition_matrix.get_row_group_start(state) + state_choice + action = choice_to_action[choice] + quotient_state = family.mdp.quotient_state_map[state] + policy[quotient_state] = action + + return policy - - def synthesize_policy_for_tree_node(self, family, prop): + # synthesize one policy for family of MDPs (if such policy exists) + # set all_sat=True if all MDPs in the family are sat + # returns - True, unsat_families, sat_families, policy + # - False, unsat_families, sat_families, sat_policies + def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration_limit=0): sat_mdp_families = [] + sat_mdp_policies = [] + unsat_mdp_families = [] + + # coloring for MDP choices + # TODO move this outside of the function since this needs to be created only once or create coloring on family.mdp action_coloring = self.create_action_coloring(self.quotient.quotient_mdp) action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes) - unsat_mdp_families = [] - + # create MDP subfamilies for hole_assignment in family.all_combinations(): subfamily = family.copy() for hole_index, hole_option in enumerate(hole_assignment): subfamily.assume_hole_options(hole_index, [hole_option]) - # TODO check if all MDPs SAT (what to do if not?) - self.quotient.build(subfamily) - primary_result = subfamily.mdp.model_check_property(prop) - self.stat.iteration_mdp(subfamily.mdp.states) - - if primary_result.sat == False: - unsat_mdp_families.append(subfamily) - continue - - sat_mdp_families.append(subfamily) - - # self.quotient.build(all_mdp_families[0]) - # primary_result = all_mdp_families[0].mdp.model_check_property(prop) - # self.stat.iteration_mdp(all_mdp_families[0].mdp.states) - #print(all_mdp_families[0].mdp.quotient_state_map) - #print(primary_result.result.scheduler) - + # find out which mdps are sat and unsat + if not all_sat: + self.quotient.build(subfamily) + primary_result = subfamily.mdp.model_check_property(prop) + self.stat.iteration_mdp(subfamily.mdp.states) + + if primary_result.sat == False: + unsat_mdp_families.append(subfamily) + continue + + sat_mdp_families.append(subfamily) + policy = self.create_policy(primary_result.result.scheduler, subfamily) + sat_mdp_policies.append(policy) + else: + sat_mdp_families.append(subfamily) + + # no sat mdps + if len(sat_mdp_families) == 0: + return False, unsat_mdp_families, sat_mdp_families, None + if len(sat_mdp_policies) == 0: + sat_mdp_policies = [None for _ in sat_mdp_families] + action_family_stack = [action_family] + iter = 0 + # AR for policies while action_family_stack: + if iteration_limit>0 and iter>iteration_limit: + break + current_action_family = action_family_stack.pop(-1) current_results = [] score_lists = {hole:[] for hole in current_action_family.hole_indices if len(current_action_family[hole].options) > 1} - for mdp_subfamily in sat_mdp_families: - self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) # maybe copy to new family? + # try to find controller inconsistency across the MDPs + # if the controllers are consistent, return True + for index, mdp_subfamily in enumerate(sat_mdp_families): + self.quotient.build_with_second_coloring(mdp_subfamily, action_coloring, current_action_family) # maybe copy to new family? primary_result = current_action_family.mdp.model_check_property(prop) self.stat.iteration_mdp(current_action_family.mdp.states) + # discard the family as soon as one MDP is unsat if primary_result.sat == False: current_results.append(False) break + # add policy if current mdp doesn't have one yet + # TODO maybe this can be done after some number of controllers are consistent? + if sat_mdp_policies[index] == None: + policy = self.create_policy(primary_result.result.scheduler, mdp_subfamily) + sat_mdp_policies[index] = policy + current_results.append(primary_result) selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, action_coloring) self.update_scores(score_lists, selection) @@ -675,10 +718,14 @@ def synthesize_policy_for_tree_node(self, family, prop): splitters = self.quotient.holes_with_max_score(scores) splitter = splitters[0] + # refinement as soon as the first inconsistency is found if scores[splitter] > 1: break else: - return True, unsat_mdp_families + for index, (result, family) in enumerate(zip(current_results, sat_mdp_families)): + policy = self.create_policy(result.result.scheduler, family) + sat_mdp_policies[index] = policy + return True, unsat_mdp_families, sat_mdp_families, sat_mdp_policies if False in current_results: continue @@ -700,15 +747,21 @@ def synthesize_policy_for_tree_node(self, family, prop): action_subfamily = paynt.quotient.holes.DesignSpace(subholes) action_subfamily.assume_hole_options(splitter, suboption) subfamilies.append(action_subfamily) - - # for subf in subfamilies: - # print(subf) - # print() action_family_stack = action_family_stack + subfamilies - #print(len(action_family_stack)) - return False, unsat_mdp_families + iter += 1 + + # compute policies for the sat mdps that were never analysed + mdps_without_policy = [index for index, policy in enumerate(sat_mdp_policies) if policy is None] + for mdp_index in mdps_without_policy: + self.quotient.build(sat_mdp_families[mdp_index]) + primary_result = sat_mdp_families[mdp_index].mdp.model_check_property(prop) + self.stat.iteration_mdp(sat_mdp_families[mdp_index].mdp.states) + policy = self.create_policy(primary_result.result.scheduler, sat_mdp_families[mdp_index]) + sat_mdp_policies[mdp_index] = policy + + return False, unsat_mdp_families, sat_mdp_families, sat_mdp_policies @@ -736,8 +789,8 @@ def synthesize_policy_tree(self, family): policy_tree_node.policy = result.policy policy_tree_node.policy_source = result.policy_source - policy_exists, _ = self.synthesize_policy_for_tree_node(family, prop) - print(policy_exists) + # if family.size < 8: + # policy_exists, unsat, sat, policy = self.synthesize_policy_for_tree_node(family, prop) if result.policy == False: reference_policy = None