cache number of actions in the MDP-family quotient

randriu · Dec 8, 2023 · a9d7fb9 · a9d7fb9
1 parent b39a6e3
commit a9d7fb9
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 9 deletions.
diff --git a/paynt/quotient/mdp_family.py b/paynt/quotient/mdp_family.py
@@ -62,6 +62,8 @@ def __init__(self, quotient_mdp, coloring, specification):
 
         self.design_space = paynt.quotient.holes.DesignSpace(coloring.holes)
 
+        # number of distinct actions in the quotient
+        self.num_actions = None
         # a list of action labels
         self.action_labels = None
         # for each choice of the quotient, the executed action
@@ -72,14 +74,10 @@ def __init__(self, quotient_mdp, coloring, specification):
         self.state_to_actions = None
 
         self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)
+        self.num_actions = len(self.action_labels)
         self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices(
             self.quotient_mdp, self.num_actions, self.choice_to_action)
         self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)
-
-
-    @property
-    def num_actions(self):
-        return len(self.action_labels)
 
     def empty_policy(self):
         return self.empty_scheduler()

diff --git a/paynt/synthesizer/policy_tree.py b/paynt/synthesizer/policy_tree.py
@@ -438,11 +438,15 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
         self.quotient.build(family)
         mdp_family_result = MdpFamilyResult()
 
-        if family.size <= 8 and False:
-            policy_is_unique, unsat_mdps, sat_mdps, sat_policies = self.synthesize_policy_for_family(family, prop)
+        if family.size <= 4 and False:
+            policy_is_unique, unsat_mdps, sat_mdps, sat_policies = self.synthesize_policy_for_family(family, prop, iteration_limit=100)
             if policy_is_unique:
-                policy = merge_policies(sat_policies)
-                assert policy is not None
+                policy = sat_policies
+                mdp_family_result.policy = policy
+                mdp_family_result.policy_source = "policy search"
+                return mdp_family_result
+
+
 
         if False and reference_policy is not None:
             # try reference policy
@@ -764,6 +768,10 @@ def synthesize_policy_tree(self, family):
         policy_tree_leaves = [policy_tree.root]
         while policy_tree_leaves:
 
+            # gi = self.stat.iterations_game
+            # if gi is not None and gi > 500:
+            #     return None
+
             policy_tree_node = policy_tree_leaves.pop(-1)
             family = policy_tree_node.family
             result = self.verify_family(family,game_solver,prop,reference_policy)