diff --git a/prism/src/param/ValueComputer.java b/prism/src/param/ValueComputer.java index 0962c4d6..2cdfcfa2 100644 --- a/prism/src/param/ValueComputer.java +++ b/prism/src/param/ValueComputer.java @@ -32,6 +32,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map.Entry; +import common.IterableBitSet; import common.IterableStateSet; import prism.PrismComponent; import prism.PrismException; @@ -357,7 +358,7 @@ final class ValueComputer extends PrismComponent return resultCacheEntry; } - Scheduler computeOptConcreteReachScheduler(Point point, ParamModel model, StateValues b1, StateValues b2, boolean min, ParamRewardStruct rew) + Scheduler computeOptConcreteReachScheduler(Point point, ParamModel model, StateValues b1, StateValues b2, boolean min, ParamRewardStruct rew) throws PrismException { ParamModel concrete = model.instantiate(point); ParamRewardStruct rewConcrete = null; @@ -370,7 +371,7 @@ final class ValueComputer extends PrismComponent return scheduler; } scheduler = new Scheduler(concrete); - precomputeZero(concrete, scheduler, b1, b2, rew, min); + precomputeScheduler(concrete, scheduler, b1, b2, rew, min); boolean changed = true; while (changed) { MutablePMC pmc = buildAlterablePMCForReach(concrete, b1, b2, scheduler, rew); @@ -456,47 +457,81 @@ final class ValueComputer extends PrismComponent } /** - * Precomputation for policy iteration. - * Sets decisions of {@code sched} such that states which have a minimal - * reachability probability or accumulated reward of zero do already - * have a minimal value of zero if using this schedulers. For states with - * minimal value larger than zero, this scheduler needs not be minimising. + * Perform precomputation for policy iteration. + *
+ * For Pmin, if possible, for states with Pmin[ b1 U b2 ] = 0, generate zero scheduler. + *
+ * For Rmin, generate a proper scheduler, i.e., with P^sched[ b1 U b2 ] = 1 + * to get proper convergence in policy iteration. + * Assumes that states with Pmax[ b1 U b2 ] < 1 have been filtered beforehand + * and are not contained in b1 or b2. + *
* In case of maximal accumulated reward or probabilities, currently does - * nothing. If {@code rew == null}, performs reachability, otherwise + * nothing. + *
+ * If {@code rew == null}, performs reachability, otherwise * performs accumulated reward computation. {@code b2} is the target set * for reachability probabilities or accumulated rewards. {@code b1} is - * either constantly {@code} or describes the left side of an until + * either constantly {@code true} or describes the left side of an until + * property. + */ + private void precomputeScheduler(ParamModel mdp, Scheduler sched, StateValues b1, StateValues b2, ParamRewardStruct rew, boolean min) throws PrismException + { + if (rew == null) { + // probability case + if (min) { + precomputePmin(mdp, sched, b1, b2); + } + } else { + if (min) + precomputeRminProperScheduler(mdp, sched, b1, b2); + // TODO: Would be nice to generate proper zero scheduler in the situations + // where that is possible + } + } + + /** + * Precomputation for policy iteration, Rmin. + * For Rmin, generate a proper scheduler, i.e., with P^sched[ b1 U b2 ] = 1 + * to get proper convergence in policy iteration. + * Assumes that states with Pmax[ b1 U b2 ] < 1 have been filtered beforehand + * and are not contained in b1 or b2. + */ + private void precomputeRminProperScheduler(ParamModel mdp, Scheduler sched, StateValues b1, StateValues b2) throws PrismException + { + explicit.MDPModelChecker mcExplicit = new explicit.MDPModelChecker(this); + mcExplicit.setSilentPrecomputations(true); + int[] strat = new int[mdp.getNumStates()]; + BitSet b1bs = b1.toBitSet(); + // use prob1e strategy generation from explicit model checker + mcExplicit.prob1(mdp, b1bs, b2.toBitSet(), false, strat); + + for (int s : IterableBitSet.getSetBits(b1bs)) { + assert(strat[s] >= 0); + sched.setChoice(s, mdp.stateBegin(s) + strat[s]); + } + } + + /** + * Precomputation for policy iteration, Pmin. + * Sets decisions of {@code sched} such that states which have a minimal + * reachability probability of zero do already have a minimal value of zero + * if using this schedulers. For states with minimal value larger than zero, + * this scheduler needs not be minimising. + * {@code b2} is the target set for reachability probabilities. {@code b1} is + * either constantly {@code true} or describes the left side of an until * property. * * @param mdp Markov model to precompute for * @param sched scheduler to precompute * @param b1 left side of U property, or constant true * @param b2 right side of U property, or of reachability reward - * @param rew reward structure - * @param min true iff minimising */ - private void precomputeZero(ParamModel mdp, Scheduler sched, StateValues b1, StateValues b2, ParamRewardStruct rew, boolean min) + private void precomputePmin(ParamModel mdp, Scheduler sched, StateValues b1, StateValues b2) { - if (!min) { - return; - } BitSet ones = new BitSet(mdp.getNumStates()); for (int state = 0; state < mdp.getNumStates(); state++) { - if (rew == null) { - ones.set(state, b2.getStateValueAsBoolean(state)); - } else { - if (!b2.getStateValueAsBoolean(state)) { - boolean avoidReward = false; - for (int choice = mdp.stateBegin(state); choice < mdp.stateEnd(state); choice++) { - if (mdp.sumLeaving(choice).equals(functionFactory.getZero())) { - sched.setChoice(state, choice); - avoidReward = true; - break; - } - } - ones.set(state, !avoidReward); - } - } + ones.set(state, b2.getStateValueAsBoolean(state)); } boolean changed = true; while (changed) {