Skip to content

Commit

Permalink
cache number of actions in the MDP-family quotient
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Dec 8, 2023
1 parent b39a6e3 commit a9d7fb9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 9 deletions.
8 changes: 3 additions & 5 deletions paynt/quotient/mdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def __init__(self, quotient_mdp, coloring, specification):

self.design_space = paynt.quotient.holes.DesignSpace(coloring.holes)

# number of distinct actions in the quotient
self.num_actions = None
# a list of action labels
self.action_labels = None
# for each choice of the quotient, the executed action
Expand All @@ -72,14 +74,10 @@ def __init__(self, quotient_mdp, coloring, specification):
self.state_to_actions = None

self.action_labels,self.choice_to_action = MdpFamilyQuotientContainer.extract_choice_labels(self.quotient_mdp)
self.num_actions = len(self.action_labels)
self.state_action_choices = MdpFamilyQuotientContainer.map_state_action_to_choices(
self.quotient_mdp, self.num_actions, self.choice_to_action)
self.state_to_actions = MdpFamilyQuotientContainer.map_state_to_available_actions(self.state_action_choices)


@property
def num_actions(self):
return len(self.action_labels)

def empty_policy(self):
return self.empty_scheduler()
Expand Down
16 changes: 12 additions & 4 deletions paynt/synthesizer/policy_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,11 +438,15 @@ def verify_family(self, family, game_solver, prop, reference_policy=None):
self.quotient.build(family)
mdp_family_result = MdpFamilyResult()

if family.size <= 8 and False:
policy_is_unique, unsat_mdps, sat_mdps, sat_policies = self.synthesize_policy_for_family(family, prop)
if family.size <= 4 and False:
policy_is_unique, unsat_mdps, sat_mdps, sat_policies = self.synthesize_policy_for_family(family, prop, iteration_limit=100)
if policy_is_unique:
policy = merge_policies(sat_policies)
assert policy is not None
policy = sat_policies
mdp_family_result.policy = policy
mdp_family_result.policy_source = "policy search"
return mdp_family_result



if False and reference_policy is not None:
# try reference policy
Expand Down Expand Up @@ -764,6 +768,10 @@ def synthesize_policy_tree(self, family):
policy_tree_leaves = [policy_tree.root]
while policy_tree_leaves:

# gi = self.stat.iterations_game
# if gi is not None and gi > 500:
# return None

policy_tree_node = policy_tree_leaves.pop(-1)
family = policy_tree_node.family
result = self.verify_family(family,game_solver,prop,reference_policy)
Expand Down

0 comments on commit a9d7fb9

Please sign in to comment.