From ced819f47d09b3cb3896986fc8511c5d271a28bd Mon Sep 17 00:00:00 2001
From: Filip Macak <filip.macak1999@gmail.com>
Date: Tue, 5 Dec 2023 17:36:31 +0100
Subject: [PATCH 1/4] Action coloring construction in family of MDPs

---
 paynt/quotient/quotient.py       | 17 +++++++
 paynt/synthesizer/policy_tree.py | 76 +++++++++++++++++++++++++++++++-
 2 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 98655707d..6a38e9a2f 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -91,6 +91,23 @@ def build(self, family):
         # prepare to discard designs
         self.discarded = 0
 
+
+    def build_with_coloring(self, family, coloring):
+        ''' Construct the quotient MDP for the family. '''
+
+        # select actions compatible with the family and restrict the quotient
+        hole_selected_actions,selected_actions,selected_actions_bv = coloring.select_actions(family)
+        family.mdp = self.build_from_choice_mask(selected_actions_bv)
+        family.mdp.design_space = family
+
+        # cash restriction information
+        family.hole_selected_actions = hole_selected_actions
+        family.selected_actions = selected_actions
+        family.selected_actions_bv = selected_actions_bv
+
+        # prepare to discard designs
+        self.discarded = 0
+
     
     @staticmethod
     def mdp_to_dtmc(mdp):
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index e952eb3fa..19b6f7d79 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -1,6 +1,7 @@
 import stormpy.synthesis
 
 import paynt.quotient.holes
+import paynt.quotient.coloring
 import paynt.quotient.models
 import paynt.synthesizer.synthesizer
 
@@ -525,7 +526,7 @@ def choose_splitter(self, family, prop, scheduler_choices, state_values, hole_se
         assert splitter is not None
         return splitter
     
-    
+
     def split(self, family, prop, scheduler_choices, state_values, hole_selection):
         
         splitter = self.choose_splitter(family,prop,scheduler_choices,state_values,hole_selection)
@@ -558,6 +559,75 @@ def split(self, family, prop, scheduler_choices, state_values, hole_selection):
             subfamilies.append(subfamily)
         
         return splitter,suboptions,subfamilies
+
+
+    def create_action_coloring(self, quotient_mdp):
+
+        holes = paynt.quotient.holes.Holes()
+        action_to_hole_options = []
+        for state in quotient_mdp.states:
+
+            state_actions = self.quotient.state_to_actions[int(state)]
+            if len(state_actions) <= 1:
+                for action in range(quotient_mdp.get_nr_available_actions(state)):
+                    action_to_hole_options.append({})
+                continue
+
+            name = f'state_{state}'
+            options = list(range(len(state_actions)))
+            option_labels = [self.quotient.action_labels[action] for action in state_actions]
+            hole = paynt.quotient.holes.Hole(name, options, option_labels)
+            holes.append(hole)
+
+            for action in range(quotient_mdp.get_nr_available_actions(state)):
+                choice = quotient_mdp.get_choice_index(state, action)
+                choice_index = -1
+                for index, action_list in enumerate(list(self.quotient.state_action_choices[int(state)])):
+                    if choice in action_list:
+                        choice_index = index
+                        break
+                assert choice_index != -1
+
+                hole_options = {len(holes)-1: state_actions.index(choice_index)}
+                action_to_hole_options.append(hole_options)
+
+        coloring = paynt.quotient.coloring.Coloring(quotient_mdp, holes, action_to_hole_options)
+
+        return coloring
+    
+
+    def synthesize_policy_for_tree_node(self, family_tree_node, prop):
+        family = family_tree_node.family
+        assert family.mdp is not None
+
+        all_mdp_families = []
+        action_coloring = self.create_action_coloring(family.mdp.model)
+        action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes)
+
+        for hole_assignment in family.all_combinations():
+            subfamily = family.copy()
+            for hole_index, hole_option in enumerate(hole_assignment):
+                subfamily.assume_hole_options(hole_index, [hole_option])
+            all_mdp_families.append(subfamily)
+
+            # TODO check if all MDPs SAT (what to do if not?)
+            # self.quotient.build(subfamily)
+            # primary_result = subfamily.mdp.model_check_property(prop)
+            # self.stat.iteration_mdp(subfamily.mdp.states)
+
+            # if primary_result.sat == False:
+            #     return False
+
+        # self.quotient.build(all_mdp_families[0])
+        # primary_result = all_mdp_families[0].mdp.model_check_property(prop)
+        # self.stat.iteration_mdp(all_mdp_families[0].mdp.states)
+        #print(all_mdp_families[0].mdp.quotient_state_map)
+        #print(primary_result.result.scheduler)
+
+        
+
+
+
         
 
 
@@ -569,6 +639,10 @@ def synthesize_policy_tree(self, family):
         game_solver.enable_profiling(True)
         policy_tree = PolicyTree(family)
 
+        self.quotient.build(policy_tree.root.family)
+        self.synthesize_policy_for_tree_node(policy_tree.root, prop)
+        exit()
+
         reference_policy = None
         policy_tree_leaves = [policy_tree.root]
         while policy_tree_leaves:

From 40d1efafeb4acd36ce85d94048d00e48b5de6c0e Mon Sep 17 00:00:00 2001
From: Filip Macak <filip.macak1999@gmail.com>
Date: Wed, 6 Dec 2023 00:51:37 +0100
Subject: [PATCH 2/4] Added mdp building from two colorings

---
 paynt/quotient/quotient.py       | 26 +++++++++++++++++++-------
 paynt/synthesizer/policy_tree.py | 15 +++++++++++++++
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 6a38e9a2f..e7475ffe2 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -92,18 +92,30 @@ def build(self, family):
         self.discarded = 0
 
 
-    def build_with_coloring(self, family, coloring):
+    def build_with_second_coloring(self, main_coloring, main_family, alt_family):
         ''' Construct the quotient MDP for the family. '''
 
         # select actions compatible with the family and restrict the quotient
-        hole_selected_actions,selected_actions,selected_actions_bv = coloring.select_actions(family)
-        family.mdp = self.build_from_choice_mask(selected_actions_bv)
-        family.mdp.design_space = family
+        alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(alt_family)
+        main_hole_selected_actions,main_selected_actions,main_selected_actions_bv = main_coloring.select_actions(main_family)
+
+        # print(alt_hole_selected_actions)
+        # print(main_hole_selected_actions)
+        # print()
+        # print(alt_selected_actions)
+        # print(main_selected_actions)
+        # print()
+        # print(alt_selected_actions_bv)
+        # print(main_selected_actions_bv)
+
+        selected_actions_bv = main_selected_actions_bv.__and__(alt_selected_actions_bv)
+        main_family.mdp = self.build_from_choice_mask(selected_actions_bv)
+        main_family.mdp.design_space = main_family
 
         # cash restriction information
-        family.hole_selected_actions = hole_selected_actions
-        family.selected_actions = selected_actions
-        family.selected_actions_bv = selected_actions_bv
+        main_family.hole_selected_actions = main_hole_selected_actions
+        main_family.selected_actions = main_selected_actions
+        main_family.selected_actions_bv = selected_actions_bv
 
         # prepare to discard designs
         self.discarded = 0
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 19b6f7d79..ad6ae533c 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -624,6 +624,21 @@ def synthesize_policy_for_tree_node(self, family_tree_node, prop):
         #print(all_mdp_families[0].mdp.quotient_state_map)
         #print(primary_result.result.scheduler)
 
+
+        action_family_stack = [action_family]
+
+        while action_family_stack:
+
+            current_action_family = action_family_stack.pop(-1)
+
+            for mdp_subfamily in all_mdp_families:
+                self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily)
+
+                primary_result = current_action_family.mdp.model_check_property(prop)
+                print(primary_result)
+
+            exit()
+
         
 
 

From 7ab99521479057f61ba0c56342c1d218c91e737b Mon Sep 17 00:00:00 2001
From: Filip Macak <filip.macak1999@gmail.com>
Date: Tue, 5 Dec 2023 22:11:39 +0100
Subject: [PATCH 3/4] Finished first version of one policy for all mdps method

---
 paynt/quotient/quotient.py       | 22 ++++++-
 paynt/synthesizer/policy_tree.py | 99 +++++++++++++++++++++++++-------
 2 files changed, 99 insertions(+), 22 deletions(-)

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index e7475ffe2..0ca9874fe 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -158,7 +158,27 @@ def scheduler_selection(self, mdp, scheduler):
                 selection[hole_index].add(option)
         selection = [list(options) for options in selection]
 
-        return selection    
+        return selection
+    
+    def scheduler_selection_with_coloring(self, mdp, scheduler, coloring):
+        ''' Get hole options involved in the scheduler selection. '''
+        assert scheduler.memoryless and scheduler.deterministic
+        
+        # construct DTMC that corresponds to this scheduler and filter reachable states/choices
+        choices = scheduler.compute_action_support(mdp.model.nondeterministic_choice_indices)
+        dtmc,_,choice_map = self.restrict_mdp(mdp.model, choices)
+        choices = [ choice_map[state] for state in range(dtmc.nr_states) ]
+        
+        # map relevant choices to hole options
+        selection = [set() for hole_index in mdp.design_space.hole_indices]
+        for choice in choices:
+            global_choice = mdp.quotient_choice_map[choice]
+            choice_options = coloring.action_to_hole_options[global_choice]
+            for hole_index,option in choice_options.items():
+                selection[hole_index].add(option)
+        selection = [list(options) for options in selection]
+
+        return selection
 
     
     @staticmethod
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index ad6ae533c..6e5c4a5ee 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -455,7 +455,7 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
                 else:
                     print("game YES but nor forall family of size ", family.size)
 
-        
+
         
         # solve primary-primary direction for the family
         # logger.debug("solving primary-primary direction...")
@@ -596,27 +596,36 @@ def create_action_coloring(self, quotient_mdp):
         return coloring
     
 
-    def synthesize_policy_for_tree_node(self, family_tree_node, prop):
-        family = family_tree_node.family
-        assert family.mdp is not None
+    def update_scores(self, score_lists, selection):
+        for hole, score_list in score_lists.items():
+            for choice in selection[hole]:
+                if choice not in score_list:
+                    score_list.append(choice)
+        
+    
 
-        all_mdp_families = []
-        action_coloring = self.create_action_coloring(family.mdp.model)
+    def synthesize_policy_for_tree_node(self, family, prop):
+        sat_mdp_families = []
+        action_coloring = self.create_action_coloring(self.quotient.quotient_mdp)
         action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes)
 
+        unsat_mdp_families = []
+
         for hole_assignment in family.all_combinations():
             subfamily = family.copy()
             for hole_index, hole_option in enumerate(hole_assignment):
                 subfamily.assume_hole_options(hole_index, [hole_option])
-            all_mdp_families.append(subfamily)
 
             # TODO check if all MDPs SAT (what to do if not?)
-            # self.quotient.build(subfamily)
-            # primary_result = subfamily.mdp.model_check_property(prop)
-            # self.stat.iteration_mdp(subfamily.mdp.states)
+            self.quotient.build(subfamily)
+            primary_result = subfamily.mdp.model_check_property(prop)
+            self.stat.iteration_mdp(subfamily.mdp.states)
+
+            if primary_result.sat == False:
+                unsat_mdp_families.append(subfamily)
+                continue
 
-            # if primary_result.sat == False:
-            #     return False
+            sat_mdp_families.append(subfamily)
 
         # self.quotient.build(all_mdp_families[0])
         # primary_result = all_mdp_families[0].mdp.model_check_property(prop)
@@ -624,25 +633,69 @@ def synthesize_policy_for_tree_node(self, family_tree_node, prop):
         #print(all_mdp_families[0].mdp.quotient_state_map)
         #print(primary_result.result.scheduler)
 
-
+        
         action_family_stack = [action_family]
 
         while action_family_stack:
 
             current_action_family = action_family_stack.pop(-1)
+            current_results = []
 
-            for mdp_subfamily in all_mdp_families:
-                self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily)
+            score_lists = {hole:[] for hole in current_action_family.hole_indices if len(current_action_family[hole].options) > 1}
+
+            for mdp_subfamily in sat_mdp_families:
+                self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) # maybe copy to new family?
 
                 primary_result = current_action_family.mdp.model_check_property(prop)
-                print(primary_result)
+                self.stat.iteration_mdp(current_action_family.mdp.states)
 
-            exit()
+                if primary_result.sat == False:
+                    current_results.append(False)
+                    break
 
-        
+                current_results.append(primary_result)
+                selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, action_coloring)
+                self.update_scores(score_lists, selection)
 
+                scores = {hole:len(score_list) for hole, score_list in score_lists.items()}
+                
+                splitters = self.quotient.holes_with_max_score(scores)
+                splitter = splitters[0]
 
+                if scores[splitter] > 1:
+                    break
+            else:
+                return True, unsat_mdp_families
 
+            if False in current_results:
+                continue
+
+            used_options = score_lists[splitter]
+            core_suboptions = [[option] for option in used_options]
+            other_suboptions = [option for option in current_action_family[splitter].options if option not in used_options]
+            if other_suboptions:
+                other_suboptions = [other_suboptions]
+            else:
+                other_suboptions = []
+            suboptions = other_suboptions + core_suboptions # DFS solves core first
+
+            subfamilies = []
+            current_action_family.splitter = splitter
+            new_design_space = current_action_family.copy()
+            for suboption in suboptions:
+                subholes = new_design_space.subholes(splitter, suboption)
+                action_subfamily = paynt.quotient.holes.DesignSpace(subholes)
+                action_subfamily.assume_hole_options(splitter, suboption)
+                subfamilies.append(action_subfamily)
+            
+            # for subf in subfamilies:
+            #     print(subf)
+            # print()
+
+            action_family_stack = action_family_stack + subfamilies
+            #print(len(action_family_stack))
+
+        return False, unsat_mdp_families
         
 
 
@@ -654,9 +707,10 @@ def synthesize_policy_tree(self, family):
         game_solver.enable_profiling(True)
         policy_tree = PolicyTree(family)
 
-        self.quotient.build(policy_tree.root.family)
-        self.synthesize_policy_for_tree_node(policy_tree.root, prop)
-        exit()
+        # self.quotient.build(policy_tree.root.family)
+        # policy_exists, _ = self.synthesize_policy_for_tree_node(policy_tree.root.family, prop)
+        # print(policy_exists)
+        # exit()
 
         reference_policy = None
         policy_tree_leaves = [policy_tree.root]
@@ -669,6 +723,9 @@ def synthesize_policy_tree(self, family):
             policy_tree_node.policy = result.policy
             policy_tree_node.policy_source = result.policy_source
 
+            policy_exists, _ = self.synthesize_policy_for_tree_node(family, prop)
+            print(policy_exists)
+
             if result.policy == False:
                 reference_policy = None
                 self.explore(family)

From 68f0a909430edc883680792707279e72fc1290df Mon Sep 17 00:00:00 2001
From: Filip Macak <filip.macak1999@gmail.com>
Date: Wed, 6 Dec 2023 18:39:55 +0100
Subject: [PATCH 4/4] Algorithm for finding one policy for a family of MDPs

---
 paynt/quotient/quotient.py       |  15 ++--
 paynt/synthesizer/policy_tree.py | 117 ++++++++++++++++++++++---------
 2 files changed, 89 insertions(+), 43 deletions(-)

diff --git a/paynt/quotient/quotient.py b/paynt/quotient/quotient.py
index 0ca9874fe..dd6a46c02 100644
--- a/paynt/quotient/quotient.py
+++ b/paynt/quotient/quotient.py
@@ -92,25 +92,18 @@ def build(self, family):
         self.discarded = 0
 
 
-    def build_with_second_coloring(self, main_coloring, main_family, alt_family):
+    def build_with_second_coloring(self, family, main_coloring, main_family):
         ''' Construct the quotient MDP for the family. '''
 
         # select actions compatible with the family and restrict the quotient
-        alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(alt_family)
+        alt_hole_selected_actions,alt_selected_actions,alt_selected_actions_bv = self.coloring.select_actions(family)
         main_hole_selected_actions,main_selected_actions,main_selected_actions_bv = main_coloring.select_actions(main_family)
 
-        # print(alt_hole_selected_actions)
-        # print(main_hole_selected_actions)
-        # print()
-        # print(alt_selected_actions)
-        # print(main_selected_actions)
-        # print()
-        # print(alt_selected_actions_bv)
-        # print(main_selected_actions_bv)
-
         selected_actions_bv = main_selected_actions_bv.__and__(alt_selected_actions_bv)
         main_family.mdp = self.build_from_choice_mask(selected_actions_bv)
         main_family.mdp.design_space = main_family
+        family.mdp = self.build_from_choice_mask(selected_actions_bv)
+        family.mdp.design_space = family
 
         # cash restriction information
         main_family.hole_selected_actions = main_hole_selected_actions
diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
index 604c5e8d6..e6c804d4c 100644
--- a/paynt/synthesizer/policy_tree.py
+++ b/paynt/synthesizer/policy_tree.py
@@ -614,58 +614,101 @@ def update_scores(self, score_lists, selection):
             for choice in selection[hole]:
                 if choice not in score_list:
                     score_list.append(choice)
+
+
+    def create_policy(self, scheduler, family):
+        choice_to_action = []
+        for choice in range(family.mdp.choices):
+            action = self.quotient.choice_to_action[family.mdp.quotient_choice_map[choice]]
+            choice_to_action.append(action)
+            
+        policy = self.quotient.empty_policy()
+        for state in range(family.mdp.model.nr_states):
+            state_choice = scheduler.get_choice(state).get_deterministic_choice()
+            choice = family.mdp.model.transition_matrix.get_row_group_start(state) + state_choice
+            action = choice_to_action[choice]
+            quotient_state = family.mdp.quotient_state_map[state]
+            policy[quotient_state] = action
+
+        return policy
         
     
-
-    def synthesize_policy_for_tree_node(self, family, prop):
+    # synthesize one policy for family of MDPs (if such policy exists)
+    # set all_sat=True if all MDPs in the family are sat
+    # returns - True, unsat_families, sat_families, policy
+    #         - False, unsat_families, sat_families, sat_policies
+    def synthesize_policy_for_tree_node(self, family, prop, all_sat=False, iteration_limit=0):
         sat_mdp_families = []
+        sat_mdp_policies = []
+        unsat_mdp_families = []
+
+        # coloring for MDP choices
+        # TODO move this outside of the function since this needs to be created only once or create coloring on family.mdp
         action_coloring = self.create_action_coloring(self.quotient.quotient_mdp)
         action_family = paynt.quotient.holes.DesignSpace(action_coloring.holes)
 
-        unsat_mdp_families = []
-
+        # create MDP subfamilies
         for hole_assignment in family.all_combinations():
             subfamily = family.copy()
             for hole_index, hole_option in enumerate(hole_assignment):
                 subfamily.assume_hole_options(hole_index, [hole_option])
 
-            # TODO check if all MDPs SAT (what to do if not?)
-            self.quotient.build(subfamily)
-            primary_result = subfamily.mdp.model_check_property(prop)
-            self.stat.iteration_mdp(subfamily.mdp.states)
-
-            if primary_result.sat == False:
-                unsat_mdp_families.append(subfamily)
-                continue
-
-            sat_mdp_families.append(subfamily)
-
-        # self.quotient.build(all_mdp_families[0])
-        # primary_result = all_mdp_families[0].mdp.model_check_property(prop)
-        # self.stat.iteration_mdp(all_mdp_families[0].mdp.states)
-        #print(all_mdp_families[0].mdp.quotient_state_map)
-        #print(primary_result.result.scheduler)
-
+            # find out which mdps are sat and unsat
+            if not all_sat:
+                self.quotient.build(subfamily)
+                primary_result = subfamily.mdp.model_check_property(prop)
+                self.stat.iteration_mdp(subfamily.mdp.states)
+    
+                if primary_result.sat == False:
+                    unsat_mdp_families.append(subfamily)
+                    continue
+    
+                sat_mdp_families.append(subfamily)
+                policy = self.create_policy(primary_result.result.scheduler, subfamily)
+                sat_mdp_policies.append(policy)
+            else:
+                sat_mdp_families.append(subfamily)
+     
+        # no sat mdps
+        if len(sat_mdp_families) == 0:
+            return False, unsat_mdp_families, sat_mdp_families, None
         
+        if len(sat_mdp_policies) == 0:
+            sat_mdp_policies = [None for _ in sat_mdp_families]
+     
         action_family_stack = [action_family]
+        iter = 0
 
+        # AR for policies
         while action_family_stack:
 
+            if iteration_limit>0 and iter>iteration_limit:
+                break
+
             current_action_family = action_family_stack.pop(-1)
             current_results = []
 
             score_lists = {hole:[] for hole in current_action_family.hole_indices if len(current_action_family[hole].options) > 1}
 
-            for mdp_subfamily in sat_mdp_families:
-                self.quotient.build_with_second_coloring(action_coloring, current_action_family, mdp_subfamily) # maybe copy to new family?
+            # try to find controller inconsistency across the MDPs
+            # if the controllers are consistent, return True
+            for index, mdp_subfamily in enumerate(sat_mdp_families):
+                self.quotient.build_with_second_coloring(mdp_subfamily, action_coloring, current_action_family) # maybe copy to new family?
 
                 primary_result = current_action_family.mdp.model_check_property(prop)
                 self.stat.iteration_mdp(current_action_family.mdp.states)
 
+                # discard the family as soon as one MDP is unsat
                 if primary_result.sat == False:
                     current_results.append(False)
                     break
 
+                # add policy if current mdp doesn't have one yet
+                # TODO maybe this can be done after some number of controllers are consistent?
+                if sat_mdp_policies[index] == None:
+                    policy = self.create_policy(primary_result.result.scheduler, mdp_subfamily)
+                    sat_mdp_policies[index] = policy
+
                 current_results.append(primary_result)
                 selection = self.quotient.scheduler_selection_with_coloring(current_action_family.mdp, primary_result.result.scheduler, action_coloring)
                 self.update_scores(score_lists, selection)
@@ -675,10 +718,14 @@ def synthesize_policy_for_tree_node(self, family, prop):
                 splitters = self.quotient.holes_with_max_score(scores)
                 splitter = splitters[0]
 
+                # refinement as soon as the first inconsistency is found
                 if scores[splitter] > 1:
                     break
             else:
-                return True, unsat_mdp_families
+                for index, (result, family) in enumerate(zip(current_results, sat_mdp_families)):
+                    policy = self.create_policy(result.result.scheduler, family)
+                    sat_mdp_policies[index] = policy
+                return True, unsat_mdp_families, sat_mdp_families, sat_mdp_policies
 
             if False in current_results:
                 continue
@@ -700,15 +747,21 @@ def synthesize_policy_for_tree_node(self, family, prop):
                 action_subfamily = paynt.quotient.holes.DesignSpace(subholes)
                 action_subfamily.assume_hole_options(splitter, suboption)
                 subfamilies.append(action_subfamily)
-            
-            # for subf in subfamilies:
-            #     print(subf)
-            # print()
 
             action_family_stack = action_family_stack + subfamilies
-            #print(len(action_family_stack))
 
-        return False, unsat_mdp_families
+            iter += 1
+
+        # compute policies for the sat mdps that were never analysed
+        mdps_without_policy = [index for index, policy in enumerate(sat_mdp_policies) if policy is None]
+        for mdp_index in mdps_without_policy:
+            self.quotient.build(sat_mdp_families[mdp_index])
+            primary_result = sat_mdp_families[mdp_index].mdp.model_check_property(prop)
+            self.stat.iteration_mdp(sat_mdp_families[mdp_index].mdp.states)
+            policy = self.create_policy(primary_result.result.scheduler, sat_mdp_families[mdp_index])
+            sat_mdp_policies[mdp_index] = policy
+
+        return False, unsat_mdp_families, sat_mdp_families, sat_mdp_policies
         
 
 
@@ -736,8 +789,8 @@ def synthesize_policy_tree(self, family):
             policy_tree_node.policy = result.policy
             policy_tree_node.policy_source = result.policy_source
 
-            policy_exists, _ = self.synthesize_policy_for_tree_node(family, prop)
-            print(policy_exists)
+            # if family.size < 8:
+            #     policy_exists, unsat, sat, policy = self.synthesize_policy_for_tree_node(family, prop)
 
             if result.policy == False:
                 reference_policy = None