Skip to content

Commit

Permalink
compute Q-values wrt. arbitrary families
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman Andriushchenko committed Dec 19, 2023
1 parent 0061568 commit b99807c
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/buildtest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
strategy:
matrix:
buildType:
- {name: "Release", imageName : "randriu/paynt", dockerTag: "temp", buildArgs: "BUILD_TYPE=Release", setupArgs: "", stormCommit : "dc7960b8f0222793b591f3d6489e2f6c7da1278f"}
- {name: "Release", imageName : "randriu/paynt", dockerTag: "temp", buildArgs: "BUILD_TYPE=Release", setupArgs: "", stormCommit : "de718be210dc6bef4b4e4507436a46786b812145"}
fail-fast: false
steps:
- name: Git clone
Expand Down
32 changes: 32 additions & 0 deletions paynt/quotient/pomdp_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,38 @@ def build_dtmc_sketch(self, fsc):
return dtmc_sketch


def compute_qvalues_for_product_submdp(self, product_submdp : paynt.quotient.models.MDP):
'''
Given an MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable)
memory node n, and action a, the Q-value Q((s,n),a).
:note it is assumed that a randomized FSC was used
:note it is assumed the provided DTMC sketch is the one obtained after the last unfolding, i.e. no other DTMC
sketch was constructed afterwards
:return a dictionary mapping (s,n,a) to Q((s,n),a)
'''
assert isinstance(self.product_pomdp_fsc, stormpy.synthesis.ProductPomdpRandomizedFsc), \
"to compute Q-values, unfolder for randomized FSC must have been used"

# model check
prop = self.get_property()
result = product_submdp.model_check_property(prop)
product_state_sub_to_value = result.result.get_values()

# map states of a sub-MDP to the states of the quotient-MDP to the state-memory pairs of the quotient POMDP
# map states values to the resulting map
product_state_to_state_memory_action = self.product_pomdp_fsc.product_state_to_state_memory_action.copy()
state_memory_action_to_value = {}
invalid_action = self.num_actions
for product_state_sub in range(product_submdp.model.nr_states):
product_state = product_submdp.quotient_state_map[product_state_sub]
state,memory_action = product_state_to_state_memory_action[product_state]
memory,action = memory_action
if action == invalid_action:
continue
value = product_state_sub_to_value[product_state_sub]
state_memory_action_to_value[(state,memory,action)] = value
return state_memory_action_to_value

def compute_qvalues_for_fsc(self, dtmc_sketch):
'''
Given a quotient MDP obtained after applying FSC to a family of POMDPs, compute for each state s, (reachable)
Expand Down
45 changes: 26 additions & 19 deletions paynt_pomdp_sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,26 +77,33 @@ def investigate_hole_assignment(pomdp_sketch, hole_assignment):

# investigate this hole assignment and return an FSC
fsc = investigate_hole_assignment(pomdp_sketch, hole_assignment)

# apply this FSC to a POMDP sketch, obtaining a DTMC sketch
dtmc_sketch = pomdp_sketch.build_dtmc_sketch(fsc)
qvalues = pomdp_sketch.compute_qvalues_for_fsc(dtmc_sketch)

# to each singleton environment, assign a value corresponding to the specification satisfiability
synthesizer = paynt.synthesizer.synthesizer_onebyone.SynthesizerOneByOne(dtmc_sketch)
family_to_value = synthesizer.evaluate(keep_value_only=True, print_stats=False)

# pick the worst family
import numpy
values = numpy.array([value for family,value in family_to_value])
if dtmc_sketch.get_property().minimizing:
worst_index = values.argmax()
else:
worst_index = values.argmin()

worst_family,worst_value = family_to_value[worst_index]
print("the worst family has value {}, printing it below:".format(worst_value))
print(worst_family)

## Q-values computation
if False:
# apply this FSC to a POMDP sketch, obtaining a DTMC sketch
subfamily = dtmc_sketch.design_space
dtmc_sketch.build(subfamily)
qvalues = pomdp_sketch.compute_qvalues_for_product_submdp(subfamily.mdp)


## evaluation
if False:
# to each singleton environment, assign a value corresponding to the specification satisfiability
synthesizer = paynt.synthesizer.synthesizer_onebyone.SynthesizerOneByOne(dtmc_sketch)
family_to_value = synthesizer.evaluate(keep_value_only=True, print_stats=False)

# pick the worst family
import numpy
values = numpy.array([value for family,value in family_to_value])
if dtmc_sketch.get_property().minimizing:
worst_index = values.argmax()
else:
worst_index = values.argmin()

worst_family,worst_value = family_to_value[worst_index]
print("the worst family has value {}, printing it below:".format(worst_value))
print(worst_family)

if profiling:
profiler.disable()
Expand Down

0 comments on commit b99807c

Please sign in to comment.