You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

3247 lines
114 KiB

//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PrimitiveIterator;
import java.util.Vector;
import common.IterableStateSet;
import common.StopWatch;
import parser.VarList;
import parser.ast.Declaration;
import parser.ast.DeclarationIntUnbounded;
import parser.ast.Expression;
import parser.ast.ExpressionTemporal;
import parser.ast.TemporalOperatorBound;
import prism.OptionsIntervalIteration;
import prism.Prism;
import prism.PrismComponent;
import prism.PrismDevNullLog;
import prism.PrismException;
import prism.PrismFileLog;
import prism.PrismLog;
import prism.PrismNotSupportedException;
import prism.PrismSettings;
import prism.PrismUtils;
import strat.MDStrategyArray;
import acceptance.AcceptanceReach;
import acceptance.AcceptanceType;
import common.BitSetAndQueue;
import common.BitSetTools;
import automata.DA;
import common.IntSet;
import common.IterableBitSet;
import explicit.IncomingChoiceRelation.Choice;
import explicit.modelviews.EquivalenceRelationInteger;
import explicit.modelviews.MDPDroppedAllChoices;
import explicit.modelviews.MDPEquiv;
import explicit.rewards.MCRewards;
import explicit.rewards.MCRewardsFromMDPRewards;
import explicit.rewards.MDPRewards;
import explicit.rewards.Rewards;
/**
* Explicit-state model checker for Markov decision processes (MDPs).
*/
public class MDPModelChecker extends ProbModelChecker
{
/**
* Create a new MDPModelChecker, inherit basic state from parent (unless null).
*/
public MDPModelChecker(PrismComponent parent) throws PrismException
{
super(parent);
// PRISM_FAIRNESS
if (settings != null && settings.getBoolean(PrismSettings.PRISM_FAIRNESS)) {
throw new PrismNotSupportedException("The explicit engine does not support model checking MDPs under fairness");
}
}
// Model checking functions
@Override
protected StateValues checkProbPathFormulaSimple(Model model, Expression expr, MinMax minMax, BitSet statesOfInterest) throws PrismException
{
// In continuous time case, defer to the standard handling without special treatments for rewards
if (model.getModelType().continuousTime()) {
return super.checkProbPathFormulaSimple(model, expr, minMax, statesOfInterest);
}
expr = Expression.convertSimplePathFormulaToCanonicalForm(expr);
ExpressionTemporal exprTemp = Expression.getTemporalOperatorForSimplePathFormula(expr);
if (exprTemp.getBounds().hasRewardBounds() ||
exprTemp.getBounds().countTimeBoundsDiscrete() > 1) {
// We have reward bounds or multiple time / step bounds
// transform model and expression and recurse
List<TemporalOperatorBound> boundsToReplace = exprTemp.getBounds().getStepBoundsForDiscreteTime();
if (!boundsToReplace.isEmpty()) {
// exempt first time bound, is handled by standard simple path formula procedure
boundsToReplace.remove(0);
}
boundsToReplace.addAll(exprTemp.getBounds().getRewardBounds());
ModelExpressionTransformation<MDP, MDP> transformed =
CounterTransformation.replaceBoundsWithCounters(this, (MDP) model, expr, boundsToReplace, statesOfInterest);
mainLog.println("\nPerforming actual calculations for\n");
mainLog.println("MDP: "+transformed.getTransformedModel().infoString());
mainLog.println("Formula: "+transformed.getTransformedExpression() +"\n");
// We can now delegate to ProbModelChecker.checkProbPathFormulaSimple as there is
// at most one time / step bound remaining
StateValues svTransformed = super.checkProbPathFormulaSimple(transformed.getTransformedModel(), transformed.getTransformedExpression(), minMax, transformed.getTransformedStatesOfInterest());
return transformed.projectToOriginalModel(svTransformed);
} else {
// We are fine, delegate to ProbModelChecker.checkProbPathFormulaSimple
return super.checkProbPathFormulaSimple(model, expr, minMax, statesOfInterest);
}
}
@Override
protected StateValues checkProbPathFormulaLTL(Model model, Expression expr, boolean qual, MinMax minMax, BitSet statesOfInterest) throws PrismException
{
LTLModelChecker mcLtl;
StateValues probsProduct, probs;
MDPModelChecker mcProduct;
LTLModelChecker.LTLProduct<MDP> product;
// For min probabilities, need to negate the formula
// (add parentheses to allow re-parsing if required)
if (minMax.isMin()) {
expr = Expression.Not(Expression.Parenth(expr.deepCopy()));
}
// For LTL model checking routines
mcLtl = new LTLModelChecker(this);
// Build product of MDP and automaton
AcceptanceType[] allowedAcceptance = {
AcceptanceType.REACH,
AcceptanceType.BUCHI,
AcceptanceType.RABIN,
AcceptanceType.GENERALIZED_RABIN,
AcceptanceType.STREETT
};
product = mcLtl.constructProductMDP(this, (MDP)model, expr, statesOfInterest, allowedAcceptance);
// Output product, if required
if (getExportProductTrans()) {
mainLog.println("\nExporting product transition matrix to file \"" + getExportProductTransFilename() + "\"...");
product.getProductModel().exportToPrismExplicitTra(getExportProductTransFilename());
}
if (getExportProductStates()) {
mainLog.println("\nExporting product state space to file \"" + getExportProductStatesFilename() + "\"...");
PrismFileLog out = new PrismFileLog(getExportProductStatesFilename());
VarList newVarList = (VarList) modulesFile.createVarList().clone();
String daVar = "_da";
while (newVarList.getIndex(daVar) != -1) {
daVar = "_" + daVar;
}
newVarList.addVar(0, new Declaration(daVar, new DeclarationIntUnbounded()), 1, null);
product.getProductModel().exportStates(Prism.EXPORT_PLAIN, newVarList, out);
out.close();
}
// Find accepting states + compute reachability probabilities
BitSet acc;
if (product.getAcceptance() instanceof AcceptanceReach) {
mainLog.println("\nSkipping accepting MEC computation since acceptance is defined via goal states...");
acc = ((AcceptanceReach)product.getAcceptance()).getGoalStates();
} else {
mainLog.println("\nFinding accepting MECs...");
acc = mcLtl.findAcceptingECStates(product.getProductModel(), product.getAcceptance());
}
mainLog.println("\nComputing reachability probabilities...");
mcProduct = new MDPModelChecker(this);
mcProduct.inheritSettings(this);
ModelCheckerResult res = mcProduct.computeReachProbs((MDP) product.getProductModel(), acc, false);
probsProduct = StateValues.createFromDoubleArray(res.soln, product.getProductModel());
// Subtract from 1 if we're model checking a negated formula for regular Pmin
if (minMax.isMin()) {
probsProduct.timesConstant(-1.0);
probsProduct.plusConstant(1.0);
}
// Output vector over product, if required
if (getExportProductVector()) {
mainLog.println("\nExporting product solution vector matrix to file \"" + getExportProductVectorFilename() + "\"...");
PrismFileLog out = new PrismFileLog(getExportProductVectorFilename());
probsProduct.print(out, false, false, false, false);
out.close();
}
// Mapping probabilities in the original model
probs = product.projectToOriginalModel(probsProduct);
probsProduct.clear();
return probs;
}
/**
* Compute rewards for a co-safe LTL reward operator.
*/
protected StateValues checkRewardCoSafeLTL(Model model, Rewards modelRewards, Expression expr, MinMax minMax, BitSet statesOfInterest) throws PrismException
{
LTLModelChecker mcLtl;
MDPRewards productRewards;
StateValues rewardsProduct, rewards;
MDPModelChecker mcProduct;
LTLModelChecker.LTLProduct<MDP> product;
// For LTL model checking routines
mcLtl = new LTLModelChecker(this);
// Model check maximal state formulas and construct DFA, with the special
// handling needed for cosafety reward translation
Vector<BitSet> labelBS = new Vector<BitSet>();
DA<BitSet, AcceptanceReach> da = mcLtl.constructDFAForCosafetyRewardLTL(this, model, expr, labelBS);
StopWatch timer = new StopWatch(getLog());
mainLog.println("\nConstructing " + model.getModelType() + "-" + da.getAutomataType() + " product...");
timer.start(model.getModelType() + "-" + da.getAutomataType() + " product");
product = mcLtl.constructProductModel(da, (MDP)model, labelBS, statesOfInterest);
timer.stop("product has " + product.getProductModel().infoString());
// Adapt reward info to product model
productRewards = ((MDPRewards) modelRewards).liftFromModel(product);
// Output product, if required
if (getExportProductTrans()) {
mainLog.println("\nExporting product transition matrix to file \"" + getExportProductTransFilename() + "\"...");
product.getProductModel().exportToPrismExplicitTra(getExportProductTransFilename());
}
if (getExportProductStates()) {
mainLog.println("\nExporting product state space to file \"" + getExportProductStatesFilename() + "\"...");
PrismFileLog out = new PrismFileLog(getExportProductStatesFilename());
VarList newVarList = (VarList) modulesFile.createVarList().clone();
String daVar = "_da";
while (newVarList.getIndex(daVar) != -1) {
daVar = "_" + daVar;
}
newVarList.addVar(0, new Declaration(daVar, new DeclarationIntUnbounded()), 1, null);
product.getProductModel().exportStates(Prism.EXPORT_PLAIN, newVarList, out);
out.close();
}
// Find accepting states + compute reachability rewards
BitSet acc = ((AcceptanceReach)product.getAcceptance()).getGoalStates();
mainLog.println("\nComputing reachability rewards...");
mcProduct = new MDPModelChecker(this);
mcProduct.inheritSettings(this);
ModelCheckerResult res = mcProduct.computeReachRewards((MDP)product.getProductModel(), productRewards, acc, minMax.isMin());
rewardsProduct = StateValues.createFromDoubleArray(res.soln, product.getProductModel());
// Output vector over product, if required
if (getExportProductVector()) {
mainLog.println("\nExporting product solution vector matrix to file \"" + getExportProductVectorFilename() + "\"...");
PrismFileLog out = new PrismFileLog(getExportProductVectorFilename());
rewardsProduct.print(out, false, false, false, false);
out.close();
}
// Mapping rewards in the original model
rewards = product.projectToOriginalModel(rewardsProduct);
rewardsProduct.clear();
return rewards;
}
// Numerical computation functions
/**
* Compute next=state probabilities.
* i.e. compute the probability of being in a state in {@code target} in the next step.
* @param mdp The MDP
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
*/
public ModelCheckerResult computeNextProbs(MDP mdp, BitSet target, boolean min) throws PrismException
{
ModelCheckerResult res = null;
int n;
double soln[], soln2[];
long timer;
timer = System.currentTimeMillis();
// Store num states
n = mdp.getNumStates();
// Create/initialise solution vector(s)
soln = Utils.bitsetToDoubleArray(target, n);
soln2 = new double[n];
// Next-step probabilities
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
// Return results
res = new ModelCheckerResult();
res.soln = soln2;
res.numIters = 1;
res.timeTaken = timer / 1000.0;
return res;
}
/**
* Given a value vector x, compute the probability:
* v(s) = min/max sched [ Sum_s' P_sched(s,s')*x(s') ] for s labeled with a,
* v(s) = 0 for s not labeled with a.
*
* Clears the StateValues object x.
*
* @param tr the transition matrix
* @param a the set of states labeled with a
* @param x the value vector
* @param min compute min instead of max
*/
public double[] computeRestrictedNext(MDP mdp, BitSet a, double[] x, boolean min)
{
int n;
double soln[];
// Store num states
n = mdp.getNumStates();
// initialized to 0.0
soln = new double[n];
// Next-step probabilities multiplication
// restricted to a states
mdp.mvMultMinMax(x, min, soln, a, false, null);
return soln;
}
/**
* Compute reachability probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target}.
* @param mdp The MDP
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
*/
public ModelCheckerResult computeReachProbs(MDP mdp, BitSet target, boolean min) throws PrismException
{
return computeReachProbs(mdp, null, target, min, null, null);
}
/**
* Compute until probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target},
* while remaining in those in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
*/
public ModelCheckerResult computeUntilProbs(MDP mdp, BitSet remain, BitSet target, boolean min) throws PrismException
{
return computeReachProbs(mdp, remain, target, min, null, null);
}
/**
* Compute reachability/until probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target},
* while remaining in those in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (may be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).
* Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.
*/
public ModelCheckerResult computeReachProbs(MDP mdp, BitSet remain, BitSet target, boolean min, double init[], BitSet known) throws PrismException
{
ModelCheckerResult res = null;
BitSet no, yes;
int n, numYes, numNo;
long timer, timerProb0, timerProb1;
int strat[] = null;
PredecessorRelation pre = null;
// Local copy of setting
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
boolean doPmaxQuotient = this.doPmaxQuotient;
// Switch to a supported method, if necessary
if (mdpSolnMethod == MDPSolnMethod.LINEAR_PROGRAMMING) {
mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
}
// Check for some unsupported combinations
if (mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION && valIterDir == ValIterDir.ABOVE) {
if (!(precomp && prob0))
throw new PrismException("Precomputation (Prob0) must be enabled for value iteration from above");
if (!min)
throw new PrismException("Value iteration from above only works for minimum probabilities");
}
if (doIntervalIteration) {
if (!min && (genStrat || exportAdv)) {
throw new PrismNotSupportedException("Currently, explicit engine does not support adversary construction for interval iteration and Pmax");
}
if (mdpSolnMethod != MDPSolnMethod.VALUE_ITERATION && mdpSolnMethod != MDPSolnMethod.GAUSS_SEIDEL) {
throw new PrismNotSupportedException("Currently, explicit engine only supports interval iteration with value iteration or Gauss-Seidel for MDPs");
}
if (init != null)
throw new PrismNotSupportedException("Interval iteration currently not supported with provided initial values");
if (!(precomp && prob0 && prob1)) {
throw new PrismNotSupportedException("Precomputations (Prob0 & Prob1) must be enabled for interval iteration");
}
if (!min) {
doPmaxQuotient = true;
}
}
if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION || mdpSolnMethod == MDPSolnMethod.MODIFIED_POLICY_ITERATION) {
if (known != null) {
throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
}
}
if (doPmaxQuotient && min) {
// for Pmin, don't do quotient
doPmaxQuotient = false;
}
// Start probabilistic reachability
timer = System.currentTimeMillis();
mainLog.println("\nStarting probabilistic reachability (" + (min ? "min" : "max") + ")...");
// Check for deadlocks in non-target state (because breaks e.g. prob1)
mdp.checkForDeadlocks(target);
// Store num states
n = mdp.getNumStates();
// Optimise by enlarging target set (if more info is available)
if (init != null && known != null && !known.isEmpty()) {
BitSet targetNew = (BitSet) target.clone();
for (int i : new IterableBitSet(known)) {
if (init[i] == 1.0) {
targetNew.set(i);
}
}
target = targetNew;
}
// If required, export info about target states
if (getExportTarget()) {
BitSet bsInit = new BitSet(n);
for (int i = 0; i < n; i++) {
bsInit.set(i, mdp.isInitialState(i));
}
List<BitSet> labels = Arrays.asList(bsInit, target);
List<String> labelNames = Arrays.asList("init", "target");
mainLog.println("\nExporting target states info to file \"" + getExportTargetFilename() + "\"...");
exportLabels(mdp, labels, labelNames, Prism.EXPORT_PLAIN, new PrismFileLog(getExportTargetFilename()));
}
// If required, create/initialise strategy storage
// Set choices to -1, denoting unknown
// (except for target states, which are -2, denoting arbitrary)
if (genStrat || exportAdv) {
strat = new int[n];
for (int i = 0; i < n; i++) {
strat[i] = target.get(i) ? -2 : -1;
}
}
if (precomp && (prob0 || prob1) && preRel) {
pre = mdp.getPredecessorRelation(this, true);
}
// Precomputation
timerProb0 = System.currentTimeMillis();
if (precomp && prob0) {
if (pre != null) {
no = prob0(mdp, remain, target, min, strat, pre);
} else {
no = prob0(mdp, remain, target, min, strat);
}
} else {
no = new BitSet();
}
timerProb0 = System.currentTimeMillis() - timerProb0;
timerProb1 = System.currentTimeMillis();
if (precomp && prob1) {
if (pre != null) {
yes = prob1(mdp, remain, target, min, strat, pre);
} else {
yes = prob1(mdp, remain, target, min, strat);
}
} else {
yes = (BitSet) target.clone();
}
timerProb1 = System.currentTimeMillis() - timerProb1;
// Print results of precomputation
numYes = yes.cardinality();
numNo = no.cardinality();
mainLog.println("target=" + target.cardinality() + ", yes=" + numYes + ", no=" + numNo + ", maybe=" + (n - (numYes + numNo)));
// If still required, store strategy for no/yes (0/1) states.
// This is just for the cases max=0 and min=1, where arbitrary choices suffice (denoted by -2)
if (genStrat || exportAdv) {
if (min) {
for (int i = yes.nextSetBit(0); i >= 0; i = yes.nextSetBit(i + 1)) {
if (!target.get(i))
strat[i] = -2;
}
} else {
for (int i = no.nextSetBit(0); i >= 0; i = no.nextSetBit(i + 1)) {
strat[i] = -2;
}
}
}
// Compute probabilities (if needed)
if (numYes + numNo < n) {
if (!min && doPmaxQuotient) {
MDPEquiv maxQuotient = maxQuotient(mdp, yes, no);
// MDPEquiv retains original state space, making the states that are not used
// trap states.
// yesInQuotient is the representative for the yes equivalence class
BitSet yesInQuotient = new BitSet();
yesInQuotient.set(maxQuotient.mapStateToRestrictedModel(yes.nextSetBit(0)));
// noInQuotient is the representative for the no equivalence class as well
// as the non-representative states (the states in any equivalence class
// that are not the representative for the class). As the latter states
// are traps, we can just add them to the no set
BitSet noInQuotient = new BitSet();
noInQuotient.set(maxQuotient.mapStateToRestrictedModel(no.nextSetBit(0)));
noInQuotient.or(maxQuotient.getNonRepresentativeStates());
MDPSparse quotientModel = new MDPSparse(maxQuotient);
ModelCheckerResult res1 = computeReachProbsNumeric(quotientModel,
mdpSolnMethod,
noInQuotient,
yesInQuotient,
min,
init,
known,
strat);
res = new ModelCheckerResult();
res.numIters = res1.numIters;
res.timeTaken = res1.timeTaken;
res.soln = new double[mdp.getNumStates()];
for (int i = 0; i < n; i++) {
if (yes.get(i)) {
res.soln[i] = 1.0;
} else if (no.get(i)) {
res.soln[i] = 0.0;
} else {
res.soln[i] = res1.soln[maxQuotient.mapStateToRestrictedModel(i)];
}
}
} else {
res = computeReachProbsNumeric(mdp, mdpSolnMethod, no, yes, min, init, known, strat);
}
} else {
res = new ModelCheckerResult();
res.soln = Utils.bitsetToDoubleArray(yes, n);
}
// Finished probabilistic reachability
timer = System.currentTimeMillis() - timer;
mainLog.println("Probabilistic reachability took " + timer / 1000.0 + " seconds.");
// Store strategy
if (genStrat) {
res.strat = new MDStrategyArray(mdp, strat);
}
// Export adversary
if (exportAdv) {
// Prune strategy, if needed
if (getRestrictStratToReach()) {
restrictStrategyToReachableStates(mdp, strat);
}
// Export
PrismLog out = new PrismFileLog(exportAdvFilename);
new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
out.close();
}
// Update time taken
res.timeTaken = timer / 1000.0;
res.timeProb0 = timerProb0 / 1000.0;
res.timePre = (timerProb0 + timerProb1) / 1000.0;
return res;
}
protected ModelCheckerResult computeReachProbsNumeric(MDP mdp, MDPSolnMethod method, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[]) throws PrismException
{
ModelCheckerResult res = null;
IterationMethod iterationMethod = null;
switch (method) {
case VALUE_ITERATION:
iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
break;
case GAUSS_SEIDEL:
iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
break;
case POLICY_ITERATION:
if (doIntervalIteration) {
throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
}
res = computeReachProbsPolIter(mdp, no, yes, min, strat);
break;
case MODIFIED_POLICY_ITERATION:
if (doIntervalIteration) {
throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
}
res = computeReachProbsModPolIter(mdp, no, yes, min, strat);
break;
default:
throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
}
if (res == null) { // not yet computed, use iterationMethod
if (!doIntervalIteration) {
res = doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, getDoTopologicalValueIteration(), strat);
} else {
res = doIntervalIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, getDoTopologicalValueIteration(), strat);
}
}
return res;
}
/**
* Prob0 precomputation algorithm (using fixpoint computation).
* i.e. determine the states of an MDP which, with min/max probability 0,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* {@code min}=true gives Prob0E, {@code min}=false gives Prob0A.
* Optionally, for min only, store optimal (memoryless) strategy info for 0 states.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public BitSet prob0(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min, int strat[])
{
int n, iters;
BitSet u, soln, unknown;
boolean u_done;
long timer;
// Start precomputation
timer = System.currentTimeMillis();
if (!silentPrecomputations)
mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")...");
// Special case: no target states
if (target.cardinality() == 0) {
soln = new BitSet(mdp.getNumStates());
soln.set(0, mdp.getNumStates());
// for min, generate strategy, any choice (-2) is fine
if (min && strat != null) {
Arrays.fill(strat, -2);
}
return soln;
}
// Initialise vectors
n = mdp.getNumStates();
u = new BitSet(n);
soln = new BitSet(n);
// Determine set of states actually need to perform computation for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(target);
if (remain != null)
unknown.and(remain);
// Fixed point loop
iters = 0;
u_done = false;
// Least fixed point - should start from 0 but we optimise by
// starting from 'target', thus bypassing first iteration
u.or(target);
soln.or(target);
while (!u_done) {
iters++;
// Single step of Prob0
mdp.prob0step(unknown, u, min, soln);
// Check termination
u_done = soln.equals(u);
// u = soln
u.clear();
u.or(soln);
}
// Negate
u.flip(0, n);
// Finished precomputation
timer = System.currentTimeMillis() - timer;
if (!silentPrecomputations) {
mainLog.print("Prob0 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
}
// If required, generate strategy. This is for min probs,
// so it can be done *after* the main prob0 algorithm (unlike for prob1).
// We simply pick, for all "no" states, the first choice for which all transitions stay in "no"
if (strat != null) {
for (int i = u.nextSetBit(0); i >= 0; i = u.nextSetBit(i + 1)) {
int numChoices = mdp.getNumChoices(i);
for (int k = 0; k < numChoices; k++) {
if (mdp.allSuccessorsInSet(i, k, u)) {
strat[i] = k;
continue;
}
}
}
}
return u;
}
/**
* Prob0 precomputation algorithm (using predecessor relation).
* i.e. determine the states of an MDP which, with min/max probability 0,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* {@code min}=true gives Prob0E, {@code min}=false gives Prob0A.
* Optionally, for min only, store optimal (memoryless) strategy info for 0 states.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @param pre the predecessor relation
*/
public BitSet prob0(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[], PredecessorRelation pre)
{
int n;
BitSet result, unknown;
long timer;
// Start precomputation
timer = System.currentTimeMillis();
mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")...");
// Special case: no target states -> probability = 0 everywhere
if (target.isEmpty()) {
result = new BitSet(mdp.getNumStates());
result.set(0, mdp.getNumStates());
return result;
}
// Initialise vectors
n = mdp.getNumStates();
// Determine set of states actually need to perform computation for
unknown = BitSetTools.complement(n, target);
if (remain != null)
unknown.and(remain);
if (min) {
BitSet T = (BitSet) target.clone();
BitSet R = (BitSet) target.clone();
while (!R.isEmpty()) {
int t = R.nextSetBit(0);
R.clear(t);
for (int s : pre.getPre(t)) {
if (!unknown.get(s) || T.get(s)) continue;
boolean forAllSomeInT = true;
for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
boolean someInT = mdp.someSuccessorsInSet(s, choice, T);
if (!someInT) {
forAllSomeInT = false;
break;
}
}
if (forAllSomeInT) {
T.set(s);
R.set(s);
}
}
}
result = T;
// result = S \ T
result.flip(0, n);
} else {
// E [ remain U target ]
result = pre.calculatePreStar(remain, target, target);
// Pmax=0 <=> ! E [ remain U target ] -> complement
result.flip(0, n);
}
// Finished precomputation
timer = System.currentTimeMillis() - timer;
mainLog.print("Prob0 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + timer / 1000.0 + " seconds.");
// If required, generate strategy. This is for min probs,
// so it can be done *after* the main prob0 algorithm (unlike for prob1).
// We simply pick, for all "no" states, the first choice for which all transitions stay in "no"
if (strat != null) {
for (int i = result.nextSetBit(0); i >= 0; i = result.nextSetBit(i + 1)) {
int numChoices = mdp.getNumChoices(i);
for (int k = 0; k < numChoices; k++) {
if (mdp.allSuccessorsInSet(i, k, result)) {
strat[i] = k;
continue;
}
}
}
}
return result;
}
/**
* Prob1 precomputation algorithm (using fixpoint computation).
* i.e. determine the states of an MDP which, with min/max probability 1,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
* Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public BitSet prob1(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min, int strat[])
{
int n, iters;
BitSet u, v, soln, unknown;
boolean u_done, v_done;
long timer;
// Start precomputation
timer = System.currentTimeMillis();
if (!silentPrecomputations)
mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")...");
// Special case: no target states
if (target.cardinality() == 0) {
return new BitSet(mdp.getNumStates());
}
// Initialise vectors
n = mdp.getNumStates();
u = new BitSet(n);
v = new BitSet(n);
soln = new BitSet(n);
// Determine set of states actually need to perform computation for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(target);
if (remain != null)
unknown.and(remain);
// Nested fixed point loop
iters = 0;
u_done = false;
// Greatest fixed point
u.set(0, n);
while (!u_done) {
v_done = false;
// Least fixed point - should start from 0 but we optimise by
// starting from 'target', thus bypassing first iteration
v.clear();
v.or(target);
soln.clear();
soln.or(target);
while (!v_done) {
iters++;
// Single step of Prob1
if (min)
mdp.prob1Astep(unknown, u, v, soln);
else
mdp.prob1Estep(unknown, u, v, soln, null);
// Check termination (inner)
v_done = soln.equals(v);
// v = soln
v.clear();
v.or(soln);
}
// Check termination (outer)
u_done = v.equals(u);
// u = v
u.clear();
u.or(v);
}
// If we need to generate a strategy, do another iteration of the inner loop for this
// We could do this during the main double fixed point above, but we would generate surplus
// strategy info for non-1 states during early iterations of the outer loop,
// which are not straightforward to remove since this method does not know which states
// already have valid strategy info from Prob0.
// Notice that we only need to look at states in u (since we already know the answer),
// so we restrict 'unknown' further
unknown.and(u);
if (!min && strat != null) {
v_done = false;
v.clear();
v.or(target);
soln.clear();
soln.or(target);
while (!v_done) {
mdp.prob1Estep(unknown, u, v, soln, strat);
v_done = soln.equals(v);
v.clear();
v.or(soln);
}
u_done = v.equals(u);
}
// Finished precomputation
timer = System.currentTimeMillis() - timer;
if (!silentPrecomputations) {
mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
}
return u;
}
/**
* Prob1 precomputation algorithm (using predecessor relation).
* i.e. determine the states of an MDP which, with min/max probability 1,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
* Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param min Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @param pre the predecessor relation
*/
public BitSet prob1(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[], PredecessorRelation pre)
{
BitSet result;
long timer;
// Start precomputation
timer = System.currentTimeMillis();
mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")...");
// Special case: no target states -> probability = 0 everywhere
if (target.isEmpty()) {
return new BitSet();
}
if (min) {
result = prob1a(mdp, remain, target, pre);
} else {
result = prob1e(mdp, remain, target, strat, pre);
}
// Finished precomputation
timer = System.currentTimeMillis() - timer;
mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + timer / 1000.0 + " seconds.");
return result;
}
/**
* Prob1A (Pmin=1) precomputation algorithm (using predecessor relation).
* Determines the states of an MDP which, with min probability 1,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param pre predecessor relation
* @return the set of states with Pmin=1[ remain U target ]
*/
private BitSet prob1a(MDP mdp, BitSet remain, BitSet target, PredecessorRelation pre)
{
// this is an adaption of the Smin=1 algorithm in the Principles of Model Checking book
// with added support for constrained reachability (remain U target)
int n = mdp.getNumStates();
// construct explicit set of remaining state in case that remain
// is given implicitely (null = all states)
if (remain == null) {
remain = new BitSet();
remain.set(0,n);
}
// Z
// = (S \ remain) \ target
// = the states that satisfy neither
// remain nor target, i.e., where we know
// a priori that they have probability 0,
// as E[ remain U target ] is definitely false
BitSet Z = new BitSet();
Z.set(0,n);
Z.andNot(remain);
Z.andNot(target);
// mainLog.println("Z = " + Z);
// The set of states that are not target states
BitSet notTarget = BitSetTools.complement(n, target);
// mainLog.println("notTarget = " + notTarget);
// The set of states that can reach Z without visiting
// any target states.
// We know that for those states Pmin<1, as we can
// reach a Z state with positive probability
BitSet canReachZ = pre.calculatePreStar(notTarget, Z, Z);
// mainLog.println("canReachZ = " + canReachZ);
// We now iteratively compute the set T of states
// in !canReachZ that have a strategy of avoiding
// to reach the target states ("safe states")
// B = unsafe states, have Pmin>0 for reaching target
// initialised with the target states
BitSet B = (BitSet) target.clone();
// T = potentially safe states, initializes with
// ( S \ B ) \ canReachZ
BitSet T = BitSetTools.complement(n, B);
T.andNot(canReachZ);
// E = unsafe states that have to be removed from T yet
BitSetAndQueue E = new BitSetAndQueue(B);
while (!E.isEmpty()) {
int t = E.dequeue();
// for removal, look at the predecessors
// and remove choices that will lead to B with probability > 0
for (int s : pre.getPre(t)) {
// predecessor already in B, continue
if (B.get(s)) {
continue;
}
// is there a choice that allows to avoid B?
boolean existsChoiceAvoidingB = false;
for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
if (!mdp.someSuccessorsInSet(s, choice, B)) {
existsChoiceAvoidingB = true;
break;
}
}
// if there is no such choice, we have to remove s
if (!existsChoiceAvoidingB) {
// add to queue
E.enqueue(s);
// add to unsafe states B
B.set(s);
// remove from safe states T
T.clear(s);
}
}
}
// The states in remain that are not target states
BitSet remainAndNotTarget = BitSetTools.minus(remain, target);
// spoilerStates = all states in T and all states that can reach Z without visiting B
BitSet spoilerStates = BitSetTools.union(T, canReachZ);
// canReachSpoilerStates = there exists strategy to reach a spoiler state with positive
// probability => Pmin<1 ( remain U target )
BitSet canReachSpoilerState = pre.calculatePreStar(remainAndNotTarget, spoilerStates, spoilerStates);
// mainLog.println("canReachSpoilerState = " + canReachSpoilerState);
// We are interested in Pmin=1 ( remain U target ),
// which we obtain by complementing, yielding the set of states
// where there is no way to avoid remain U target
BitSet result = BitSetTools.complement(n, canReachSpoilerState);
// mainLog.println("result = " + result);
return result;
}
/**
* Prob1E (Pmax=1) precomputation algorithm (using predecessor relation).
* Determines the states of an MDP which, with max probability 1,
* reach a state in {@code target}, while remaining in those in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param pre predecessor relation
* @return the set of states with Pmax=1[ remain U target ]
*/
private BitSet prob1e(MDP mdp, BitSet remain, BitSet target, int strat[], PredecessorRelation pre)
{
// This algorithm is an adaption of the Smax=1 algorithm
// in the Principles of Model Checking book
int n = mdp.getNumStates();
// Which choices remain enabled?
ChoicesMask enabledChoices = new ChoicesMask(mdp);
// We count the remaining, enabled choices in each
// state so we can easily determine when
// there are no more enabled choices for a state
int[] remainingChoices = new int[n];
for (int s = 0; s < n; s++) {
remainingChoices[s] = mdp.getNumChoices(s);
}
// the set of state that can reach the target, i.e. E[ remain U target ]
BitSet canReachTarget = pre.calculatePreStar(remain, target, target);
// the set of state that can't reach the target, i.e. !E[ remain U target ]
BitSet cantReachTarget = BitSetTools.complement(n, canReachTarget);
// in addition to the PredecessorRelation, we need more detailed
// information about the incoming choices for each state
IncomingChoiceRelation incoming = IncomingChoiceRelation.forModel(this, mdp);
// in each iteration step, U is the set of states that need to be
// removed in this iteration because they can't reach the target
BitSet U = cantReachTarget;
// unknown is the set of states that can still reach the target,
// but might need to be removed later on
BitSet unknown = (BitSet) canReachTarget.clone();
unknown.andNot(target);
// unsafe are the states that have been determined to have Pmax<1[ remain U target ]
BitSet unsafe = (BitSet) cantReachTarget.clone();
int iterations = 0 ;
while (!U.isEmpty()) {
iterations++;
// mainLog.println("Iteration " + iterations +": " +U);
// states to remove in this iteration step:
// all states in U (can not reach target anymore)
// and other states where we have determined that they
// don't have any choices anymore to remain in unknown
BitSetAndQueue toRemove = new BitSetAndQueue(U);
while (!toRemove.isEmpty()) {
int t = toRemove.dequeue();
// for each state t, we consider the incoming choices
for (IncomingChoiceRelation.Choice choice : incoming.getIncomingChoices(t)) {
// s is the predecessor of t, i.e., P(s,c,t) > 0
int s = choice.getState();
int c = choice.getChoice();
if (!enabledChoices.isEnabled(s,c)) {
// already disabled
continue;
}
if (!unknown.get(s)) {
// state already processed
continue;
}
// We disable the choice and decrement the corresponding counter for s
enabledChoices.disableChoice(s, choice.getChoice());
remainingChoices[s]--;
// there are no more remaining choices, remove s
if (remainingChoices[s] == 0) {
// s is not in unknown anymore
unknown.clear(s);
// s has become unsafe
unsafe.set(s);
// and we have to process the removal of s
toRemove.enqueue(s);
}
}
}
// after removal, it may be the case that states in unknown and
// that could previously reach target can not reach target anymore
// so, we recompute canReachTarget, but now allowing only the
// enabledChoices that remain enabled
canReachTarget = incoming.calculatePreStar(unknown, target, target, enabledChoices);
// we compute the set of states that have become unsafe because they
// can't reach target anymore:
// U = unknown states that can not reach target
U = BitSetTools.minus(unknown, canReachTarget);
// remove U from unknown
unknown.andNot(U);
// mark all U states as unsafe
unsafe.or(U);
// do loop once more, now with updated U
}
// the result set of states are the states in target
// and those in unknown, as they have not been removed
// during the iterations
BitSet result = BitSetTools.union(unknown, target);
if (strat != null) {
// we have to generate a strategy for all remaining states
BitSet done = new BitSet();
BitSetAndQueue todo = new BitSetAndQueue(target);
while (!todo.isEmpty()) {
int t = todo.dequeue();
if (done.get(t))
continue;
for (Choice choice: incoming.getIncomingChoices(t)) {
int s = choice.getState();
if (done.get(s)) {
continue;
}
if (target.get(s)) {
// target states don't need treatment
continue;
}
int ch = choice.getChoice();
if (!enabledChoices.isEnabled(s, ch)) {
// choice is not a prob1e choice
continue;
}
// s should be a prob1e \ target state at this point
assert(unknown.get(s));
if (strat[s] == -1) {
// does not have a fixed choice yet
// fix the choice, as this is the
// "earliest" moment where s has been discovered,
// guaranteeing that taking ch will almost surely
// make progress towards the target
strat[s] = ch;
todo.enqueue(s);
} else {
// as strategy is fixed, was already
// enqueued, nothing to do
}
}
}
}
return result;
}
/**
* Compute reachability probabilities using value iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param no Probability 0 states
* @param yes Probability 1 states
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult computeReachProbsValIter(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
throws PrismException
{
IterationMethodPower iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
return doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, false, strat);
}
/**
* Compute reachability probabilities using value iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param no Probability 0 states
* @param yes Probability 1 states
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param iterationMethod The iteration method
* @param topological Do topological value iteration?
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null), 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult doValueIterationReachProbs(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, IterationMethod iterationMethod, boolean topological, int strat[])
throws PrismException
{
BitSet unknown;
int i, n;
double initVal;
long timer;
// Start value iteration
timer = System.currentTimeMillis();
String description = (min ? "min" : "max")
+ (topological ? ", topological": "" )
+ ", with " + iterationMethod.getDescriptionShort();
mainLog.println("Starting value iteration (" + description + ")...");
ExportIterations iterationsExport = null;
if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
iterationsExport = new ExportIterations("Explicit MDP ReachProbs value iteration (" + description + ")");
mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
}
// Store num states
n = mdp.getNumStates();
// Initialise solution vectors. Use (where available) the following in order of preference:
// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal
// where initVal is 0.0 or 1.0, depending on whether we converge from below/above.
initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0;
if (init != null) {
if (known != null) {
for (i = 0; i < n; i++)
init[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
} else {
for (i = 0; i < n; i++)
init[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
}
} else {
init = new double[n];
for (i = 0; i < n; i++)
init[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal;
}
// Determine set of states actually need to compute values for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(yes);
unknown.andNot(no);
if (known != null)
unknown.andNot(known);
if (iterationsExport != null)
iterationsExport.exportVector(init, 0);
IterationMethod.IterationValIter iteration = iterationMethod.forMvMultMinMax(mdp, min, strat);
iteration.init(init);
IntSet unknownStates = IntSet.asIntSet(unknown);
if (topological) {
// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
// states in unknown
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);
IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
soln[s] = mdp.mvMultJacMinMaxSingle(s, soln, min, strat);
};
// run the actual value iteration
return iterationMethod.doTopologicalValueIteration(this, description, sccs, iteration, singletonSCCSolver, timer, iterationsExport);
} else {
// run the actual value iteration
return iterationMethod.doValueIteration(this, description, iteration, unknownStates, timer, iterationsExport);
}
}
/**
* Compute reachability probabilities using interval iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param no Probability 0 states
* @param yes Probability 1 states
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param iterationMethod The iteration method
* @param topological Do topological value iteration?
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult doIntervalIterationReachProbs(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, IterationMethod iterationMethod, boolean topological, int strat[])
throws PrismException
{
BitSet unknown;
int i, n;
double initBelow[], initAbove[];
long timer;
// Start value iteration
timer = System.currentTimeMillis();
String description = (min ? "min" : "max")
+ (topological ? ", topological": "" )
+ ", with " + iterationMethod.getDescriptionShort();
mainLog.println("Starting interval iteration (" + description + ")...");
ExportIterations iterationsExport = null;
if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
iterationsExport = new ExportIterations("Explicit MDP ReachProbs interval iteration (" + description + ")");
mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
}
// Store num states
n = mdp.getNumStates();
// Create solution vector(s)
initBelow = (init == null) ? new double[n] : init;
initAbove = new double[n];
// Initialise solution vectors. Use (where available) the following in order of preference:
// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) initVal
// where initVal is 0.0 or 1.0, depending on whether we converge from below/above.
if (known != null && init != null) {
for (i = 0; i < n; i++) {
initBelow[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 0.0;
initAbove[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 1.0;
}
} else {
for (i = 0; i < n; i++) {
initBelow[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 0.0;
initAbove[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 1.0;
}
}
// Determine set of states actually need to compute values for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(yes);
unknown.andNot(no);
if (known != null)
unknown.andNot(known);
if (iterationsExport != null) {
iterationsExport.exportVector(initBelow, 0);
iterationsExport.exportVector(initAbove, 1);
}
OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);
final boolean enforceMonotonicFromBelow = iiOptions.isEnforceMonotonicityFromBelow();
final boolean enforceMonotonicFromAbove = iiOptions.isEnforceMonotonicityFromAbove();
final boolean checkMonotonic = iiOptions.isCheckMonotonicity();
if (!enforceMonotonicFromAbove) {
getLog().println("Note: Interval iteration is configured to not enforce monotonicity from above.");
}
if (!enforceMonotonicFromBelow) {
getLog().println("Note: Interval iteration is configured to not enforce monotonicity from below.");
}
IterationMethod.IterationIntervalIter below = iterationMethod.forMvMultMinMaxInterval(mdp, min, strat, true, enforceMonotonicFromBelow, checkMonotonic);
IterationMethod.IterationIntervalIter above = iterationMethod.forMvMultMinMaxInterval(mdp, min, strat, false, enforceMonotonicFromAbove, checkMonotonic);
below.init(initBelow);
above.init(initAbove);
IntSet unknownStates = IntSet.asIntSet(unknown);
if (topological) {
// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
// states in unknown
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);
IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
soln[s] = mdp.mvMultJacMinMaxSingle(s, soln, min, strat);
};
// run the actual value iteration
return iterationMethod.doTopologicalIntervalIteration(this, description, sccs, below, above, singletonSCCSolver, timer, iterationsExport);
} else {
// run the actual value iteration
return iterationMethod.doIntervalIteration(this, description, below, above, unknownStates, timer, iterationsExport);
}
}
/**
* Compute reachability probabilities using Gauss-Seidel (including Jacobi-style updates).
* @param mdp The MDP
* @param no Probability 0 states
* @param yes Probability 1 states
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult computeReachProbsGaussSeidel(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
throws PrismException
{
IterationMethodGS iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
return doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, false, strat);
}
/**
* Compute reachability probabilities using policy iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp: The MDP
* @param no: Probability 0 states
* @param yes: Probability 1 states
* @param min: Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
protected ModelCheckerResult computeReachProbsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
{
ModelCheckerResult res;
int i, n, iters, totalIters;
double soln[], soln2[];
boolean done;
long timer;
DTMCModelChecker mcDTMC;
DTMC dtmc;
// Re-use solution to solve each new policy (strategy)?
boolean reUseSoln = true;
// Start policy iteration
timer = System.currentTimeMillis();
mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
// Create a DTMC model checker (for solving policies)
mcDTMC = new DTMCModelChecker(this);
mcDTMC.inheritSettings(this);
mcDTMC.setLog(new PrismDevNullLog());
// Store num states
n = mdp.getNumStates();
// Create solution vectors
soln = new double[n];
soln2 = new double[n];
// Initialise solution vectors.
for (i = 0; i < n; i++)
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
// If not passed in, create new storage for strategy and initialise
// Initial strategy just picks first choice (0) everywhere
if (strat == null) {
strat = new int[n];
for (i = 0; i < n; i++)
strat[i] = 0;
}
// Otherwise, just initialise for states not in yes/no
// (Optimal choices for yes/no should already be known)
else {
for (i = 0; i < n; i++)
if (!(no.get(i) || yes.get(i)))
strat[i] = 0;
}
boolean backwardsGS = (linEqMethod == LinEqMethod.BACKWARDS_GAUSS_SEIDEL);
// Start iterations
iters = totalIters = 0;
done = false;
while (!done) {
iters++;
// Solve induced DTMC for strategy
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, reUseSoln ? soln : null, null, backwardsGS);
soln = res.soln;
totalIters += res.numIters;
// Check if optimal, improve non-optimal choices
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
done = true;
for (i = 0; i < n; i++) {
// Don't look at no/yes states - we may not have strategy info for them,
// so they might appear non-optimal
if (no.get(i) || yes.get(i))
continue;
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
done = false;
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
// Only update strategy if strictly better
if (!opt.contains(strat[i]))
strat[i] = opt.get(0);
}
}
}
// Finished policy iteration
timer = System.currentTimeMillis() - timer;
mainLog.print("Policy iteration");
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
// Return results
// (Note we don't add the strategy - the one passed in is already there
// and might have some existing choices stored for other states).
res = new ModelCheckerResult();
res.soln = soln;
res.numIters = totalIters;
res.timeTaken = timer / 1000.0;
return res;
}
/**
* Compute reachability probabilities using modified policy iteration.
* @param mdp: The MDP
* @param no: Probability 0 states
* @param yes: Probability 1 states
* @param min: Min or max probabilities (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
protected ModelCheckerResult computeReachProbsModPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
{
ModelCheckerResult res;
int i, n, iters, totalIters;
double soln[], soln2[];
boolean done;
long timer;
DTMCModelChecker mcDTMC;
DTMC dtmc;
// Start value iteration
timer = System.currentTimeMillis();
mainLog.println("Starting modified policy iteration (" + (min ? "min" : "max") + ")...");
// Create a DTMC model checker (for solving policies)
mcDTMC = new DTMCModelChecker(this);
mcDTMC.inheritSettings(this);
mcDTMC.setLog(new PrismDevNullLog());
// Limit iters for DTMC solution - this implements "modified" policy iteration
mcDTMC.setMaxIters(100);
mcDTMC.setErrorOnNonConverge(false);
// Store num states
n = mdp.getNumStates();
// Create solution vectors
soln = new double[n];
soln2 = new double[n];
// Initialise solution vectors.
for (i = 0; i < n; i++)
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
// If not passed in, create new storage for strategy and initialise
// Initial strategy just picks first choice (0) everywhere
if (strat == null) {
strat = new int[n];
for (i = 0; i < n; i++)
strat[i] = 0;
}
// Otherwise, just initialise for states not in yes/no
// (Optimal choices for yes/no should already be known)
else {
for (i = 0; i < n; i++)
if (!(no.get(i) || yes.get(i)))
strat[i] = 0;
}
boolean backwardsGS = (linEqMethod == LinEqMethod.BACKWARDS_GAUSS_SEIDEL);
// Start iterations
iters = totalIters = 0;
done = false;
while (!done) {
iters++;
// Solve induced DTMC for strategy
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, soln, null, backwardsGS);
soln = res.soln;
totalIters += res.numIters;
// Check if optimal, improve non-optimal choices
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
done = true;
for (i = 0; i < n; i++) {
// Don't look at no/yes states - we don't store strategy info for them,
// so they might appear non-optimal
if (no.get(i) || yes.get(i))
continue;
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
done = false;
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
strat[i] = opt.get(0);
}
}
}
// Finished policy iteration
timer = System.currentTimeMillis() - timer;
mainLog.print("Modified policy iteration");
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
// Return results
// (Note we don't add the strategy - the one passed in is already there
// and might have some existing choices stored for other states).
res = new ModelCheckerResult();
res.soln = soln;
res.numIters = totalIters;
res.timeTaken = timer / 1000.0;
return res;
}
/**
* Construct strategy information for min/max reachability probabilities.
* (More precisely, list of indices of choices resulting in min/max.)
* (Note: indices are guaranteed to be sorted in ascending order.)
* @param mdp The MDP
* @param state The state to generate strategy info for
* @param target The set of target states to reach
* @param min Min or max probabilities (true=min, false=max)
* @param lastSoln Vector of values from which to recompute in one iteration
*/
public List<Integer> probReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
{
double val = mdp.mvMultMinMaxSingle(state, lastSoln, min, null);
return mdp.mvMultMinMaxSingleChoices(state, lastSoln, min, val);
}
/**
* Compute bounded reachability probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target} within k steps.
* @param mdp The MDP
* @param target Target states
* @param k Bound
* @param min Min or max probabilities (true=min, false=max)
*/
public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet target, int k, boolean min) throws PrismException
{
return computeBoundedReachProbs(mdp, null, target, k, min, null, null);
}
/**
* Compute bounded until probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target},
* within k steps, and while remaining in states in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param k Bound
* @param min Min or max probabilities (true=min, false=max)
*/
public ModelCheckerResult computeBoundedUntilProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min) throws PrismException
{
return computeBoundedReachProbs(mdp, remain, target, k, min, null, null);
}
/**
* Compute bounded reachability/until probabilities.
* i.e. compute the min/max probability of reaching a state in {@code target},
* within k steps, and while remaining in states in {@code remain}.
* @param mdp The MDP
* @param remain Remain in these states (optional: null means "all")
* @param target Target states
* @param k Bound
* @param min Min or max probabilities (true=min, false=max)
* @param init Optionally, an initial solution vector (may be overwritten)
* @param results Optional array of size k+1 to store (init state) results for each step (null if unused)
*/
public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min, double init[], double results[])
throws PrismException
{
ModelCheckerResult res = null;
BitSet unknown;
int i, n, iters;
double soln[], soln2[], tmpsoln[];
long timer;
// Start bounded probabilistic reachability
timer = System.currentTimeMillis();
mainLog.println("\nStarting bounded probabilistic reachability (" + (min ? "min" : "max") + ")...");
// Store num states
n = mdp.getNumStates();
// Create solution vector(s)
soln = new double[n];
soln2 = (init == null) ? new double[n] : init;
// Initialise solution vectors. Use passed in initial vector, if present
if (init != null) {
for (i = 0; i < n; i++)
soln[i] = soln2[i] = target.get(i) ? 1.0 : init[i];
} else {
for (i = 0; i < n; i++)
soln[i] = soln2[i] = target.get(i) ? 1.0 : 0.0;
}
// Store intermediate results if required
// (compute min/max value over initial states for first step)
if (results != null) {
// TODO: whether this is min or max should be specified somehow
results[0] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
}
// Determine set of states actually need to perform computation for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(target);
if (remain != null)
unknown.and(remain);
// Start iterations
iters = 0;
while (iters < k) {
iters++;
// Matrix-vector multiply and min/max ops
mdp.mvMultMinMax(soln, min, soln2, unknown, false, null);
// Store intermediate results if required
// (compute min/max value over initial states for this step)
if (results != null) {
// TODO: whether this is min or max should be specified somehow
results[iters] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
}
// Swap vectors for next iter
tmpsoln = soln;
soln = soln2;
soln2 = tmpsoln;
}
// Finished bounded probabilistic reachability
timer = System.currentTimeMillis() - timer;
mainLog.print("Bounded probabilistic reachability (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
// Return results
res = new ModelCheckerResult();
res.soln = soln;
res.lastSoln = soln2;
res.numIters = iters;
res.timeTaken = timer / 1000.0;
res.timePre = 0.0;
return res;
}
/**
* Compute expected cumulative (step-bounded) rewards.
* i.e. compute the min/max reward accumulated within {@code k} steps.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param min Min or max rewards (true=min, false=max)
*/
public ModelCheckerResult computeCumulativeRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException
{
ModelCheckerResult res = null;
int i, n, iters;
long timer;
double soln[], soln2[], tmpsoln[];
// Start expected cumulative reward
timer = System.currentTimeMillis();
mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")...");
// Store num states
n = mdp.getNumStates();
// Create/initialise solution vector(s)
soln = new double[n];
soln2 = new double[n];
for (i = 0; i < n; i++)
soln[i] = soln2[i] = 0.0;
// Start iterations
iters = 0;
while (iters < k) {
iters++;
// Matrix-vector multiply and min/max ops
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null);
// Swap vectors for next iter
tmpsoln = soln;
soln = soln2;
soln2 = tmpsoln;
}
// Finished value iteration
timer = System.currentTimeMillis() - timer;
mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")");
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
// Return results
res = new ModelCheckerResult();
res.soln = soln;
res.numIters = iters;
res.timeTaken = timer / 1000.0;
return res;
}
/**
* Compute upper bound for maximum expected reward, with the method specified in the settings.
* @param mdp the model
* @param mdpRewards the rewards
* @param target the target states
* @param unknown the states that are not target or infinity states
* @param inf the infinite states
* @return upper bound on Rmax=?[ F target ] for all states
*/
double computeReachRewardsMaxUpperBound(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
{
if (unknown.isEmpty()) {
mainLog.println("Skipping upper bound computation, no unknown states...");
return 0;
}
// inf and target states become trap states (with dropped choices)
BitSet trapStates = (BitSet) target.clone();
trapStates.or(inf);
MDP cleanedMDP = new MDPDroppedAllChoices(mdp, trapStates);
OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);
double upperBound = 0.0;
String method = null;
switch (iiOptions.getBoundMethod()) {
case VARIANT_1_COARSE:
upperBound = computeReachRewardsMaxUpperBoundVariant1Coarse(cleanedMDP, mdpRewards, target, unknown, inf);
method = "variant 1, coarse";
break;
case VARIANT_1_FINE:
upperBound = computeReachRewardsMaxUpperBoundVariant1Fine(cleanedMDP, mdpRewards, target, unknown, inf);
method = "variant 1, fine";
break;
case DEFAULT:
case VARIANT_2:
upperBound = computeReachRewardsMaxUpperBoundVariant2(cleanedMDP, mdpRewards, target, unknown, inf);
method = "variant 2";
break;
case DSMPI:
throw new PrismNotSupportedException("Dijkstra Sweep MPI upper bound heuristic can not be used for Rmax");
}
if (method == null) {
throw new PrismException("Unknown upper bound heuristic");
}
mainLog.println("Upper bound for max expectation (" + method + "): " + upperBound);
return upperBound;
}
/**
* Compute upper bound for minimum expected reward, with the method specified in the settings.
* @param mdp the model
* @param mdpRewards the rewards
* @param target the target states
* @param unknown the states that are not target or infinity states
* @param inf the infinite states
* @return upper bound on Rmin=?[ F target ] for all unknown states
*/
double computeReachRewardsMinUpperBound(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
{
// inf and target states become trap states (with dropped choices)
BitSet trapStates = (BitSet) target.clone();
trapStates.or(inf);
MDP cleanedMDP = new MDPDroppedAllChoices(mdp, trapStates);
OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);
double upperBound = 0.0;
String method = null;
switch (iiOptions.getBoundMethod()) {
case DEFAULT:
case DSMPI:
upperBound = DijkstraSweepMPI.computeUpperBound(this, mdp, mdpRewards, target, unknown);
method = "Dijkstra Sweep MPI";
break;
case VARIANT_1_COARSE:
upperBound = computeReachRewardsMaxUpperBoundVariant1Coarse(cleanedMDP, mdpRewards, target, unknown, inf);
method = "using Rmax upper bound via variant 1, coarse";
break;
case VARIANT_1_FINE:
upperBound = computeReachRewardsMaxUpperBoundVariant1Fine(cleanedMDP, mdpRewards, target, unknown, inf);
method = "using Rmax upper bound via variant 1, fine";
break;
case VARIANT_2:
upperBound = computeReachRewardsMaxUpperBoundVariant2(cleanedMDP, mdpRewards, target, unknown, inf);
method = "using Rmax upper bound via variant 2";
break;
}
if (method == null) {
throw new PrismException("Unknown upper bound heuristic");
}
mainLog.println("Upper bound for min expectation (" + method + "): " + upperBound);
return upperBound;
}
/**
* Return true if the MDP is contracting for all states in the 'unknown'
* set, i.e., if Pmin=1( unknown U target) holds.
*/
private boolean isContracting(MDP mdp, BitSet unknown, BitSet target)
{
// compute Pmin=1( unknown U target )
BitSet pmin1 = prob1(mdp, unknown, target, true, null);
BitSet tmp = (BitSet) unknown.clone();
tmp.andNot(pmin1);
if (!tmp.isEmpty()) {
// unknown is not contained in pmin1, not contracting
return false;
}
return true;
}
/**
* Compute upper bound for maximum expected reward (variant 1, coarse),
* i.e., does not compute separate q_t / p_t per SCC.
* Uses Rs = S, i.e., does not take reachability into account.
* @param mdp the model
* @param mdpRewards the rewards
* @param target the target states
* @param unknown the states that are not target or infinity states
* @return upper bound on Rmax=?[ F target ] for all states
*/
double computeReachRewardsMaxUpperBoundVariant1Coarse(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
{
double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
double[] maxRews = new double[mdp.getNumStates()];
int[] Ct = new int[mdp.getNumStates()];
StopWatch timer = new StopWatch(getLog());
timer.start("computing an upper bound for maximal expected reward");
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, null);
BitSet trivial = new BitSet();
double q = 0;
for (int scc = 0, numSCCs = sccs.getNumSCCs(); scc < numSCCs; scc++) {
IntSet statesForSCC = sccs.getStatesForSCC(scc);
int cardinality = statesForSCC.cardinality();
PrimitiveIterator.OfInt itSCC = statesForSCC.iterator();
while (itSCC.hasNext()) {
int s = itSCC.nextInt();
Ct[s] = cardinality;
boolean hasSelfloop = false;
for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
double probRemain = 0;
boolean allRemain = true; // all successors remain in the SCC?
for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
Entry<Integer, Double> t = it.next();
if (statesForSCC.get(t.getKey())) {
probRemain += t.getValue();
hasSelfloop = true;
} else {
allRemain = false;
}
}
if (!allRemain) { // action in the set X
q = Math.max(q, probRemain);
}
}
if (cardinality == 1 && !hasSelfloop) {
trivial.set(s);
}
}
}
double p = 1;
for (int s = 0; s < mdp.getNumStates(); s++) {
double maxRew = 0;
for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
Entry<Integer, Double> t = it.next();
p = Math.min(p, t.getValue());
double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
maxRew = Math.max(maxRew, rew);
}
}
maxRews[s] = maxRew;
}
double upperBound = 0;
for (int s = 0; s < mdp.getNumStates(); s++) {
if (target.get(s) || inf.get(s)) {
// inf or target states: not relevant, set visits to 0, ignore in summation
boundsOnExpectedVisits[s] = 0.0;
} else if (unknown.get(s)) {
if (trivial.get(s)) {
// s is a trivial SCC: seen at most once
boundsOnExpectedVisits[s] = 1.0;
} else {
boundsOnExpectedVisits[s] = 1 / (Math.pow(p, Ct[s]-1) * (1.0-q));
}
upperBound += boundsOnExpectedVisits[s] * maxRews[s];
}
}
if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
mainLog.println("Upper bound for max expectation computation (variant 1, coarse):");
mainLog.println("p = " + p);
mainLog.println("q = " + q);
mainLog.println("|Ct| = " + Arrays.toString(Ct));
mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
mainLog.println("maxRews = " + Arrays.toString(maxRews));
}
timer.stop();
// mainLog.println("Upper bound for max expectation (variant 1, coarse): " + upperBound);
if (!Double.isFinite(upperBound)) {
throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
}
return upperBound;
}
/**
* Compute upper bound for maximum expected reward (variant 1, fine).
* i.e., does compute separate q_t / p_t per SCC.
* Uses Rs = S, i.e., does not take reachability into account.
* @param mdp the model
* @param mdpRewards the rewards
* @param target the target states
* @param unknown the states that are not target or infinity states
* @return upper bound on Rmax=?[ F target ] for all states
*/
double computeReachRewardsMaxUpperBoundVariant1Fine(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
{
double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
double[] qt = new double[mdp.getNumStates()];
double[] pt = new double[mdp.getNumStates()];
double[] maxRews = new double[mdp.getNumStates()];
int[] Ct = new int[mdp.getNumStates()];
StopWatch timer = new StopWatch(getLog());
timer.start("computing an upper bound for maximal expected reward");
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, null);
BitSet trivial = new BitSet();
for (int scc = 0, numSCCs = sccs.getNumSCCs(); scc < numSCCs; scc++) {
IntSet statesForSCC = sccs.getStatesForSCC(scc);
double q = 0;
double p = 1;
int cardinality = statesForSCC.cardinality();
PrimitiveIterator.OfInt itSCC = statesForSCC.iterator();
while (itSCC.hasNext()) {
int s = itSCC.nextInt();
Ct[s] = cardinality;
boolean hasSelfloop = false;
for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
double probRemain = 0;
boolean allRemain = true; // all successors remain in the SCC?
for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
Entry<Integer, Double> t = it.next();
if (statesForSCC.get(t.getKey())) {
probRemain += t.getValue();
p = Math.min(p, t.getValue());
hasSelfloop = true;
} else {
allRemain = false;
}
}
if (!allRemain) { // action in the set Xt
q = Math.max(q, probRemain);
}
}
if (cardinality == 1 && !hasSelfloop) {
trivial.set(s);
}
}
for (int s : statesForSCC) {
qt[s] = q;
pt[s] = p;
}
}
for (int s = 0; s < mdp.getNumStates(); s++) {
double maxRew = 0;
for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
maxRew = Math.max(maxRew, rew);
}
maxRews[s] = maxRew;
}
double upperBound = 0;
for (int s = 0; s < mdp.getNumStates(); s++) {
if (target.get(s) || inf.get(s)) {
// inf or target states: not relevant, set visits to 0, ignore in summation
boundsOnExpectedVisits[s] = 0.0;
} else if (unknown.get(s)) {
if (trivial.get(s)) {
// s is a trivial SCC: seen at most once
boundsOnExpectedVisits[s] = 1.0;
} else {
boundsOnExpectedVisits[s] = 1 / (Math.pow(pt[s], Ct[s]-1) * (1.0-qt[s]));
}
upperBound += boundsOnExpectedVisits[s] * maxRews[s];
}
}
timer.stop();
if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
mainLog.println("Upper bound for max expectation computation (variant 1, fine):");
mainLog.println("pt = " + Arrays.toString(pt));
mainLog.println("qt = " + Arrays.toString(qt));
mainLog.println("|Ct| = " + Arrays.toString(Ct));
mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
mainLog.println("maxRews = " + Arrays.toString(maxRews));
}
// mainLog.println("Upper bound for max expectation (variant 1, fine): " + upperBound);
if (!Double.isFinite(upperBound)) {
throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
}
return upperBound;
}
/**
* Compute upper bound for maximum expected reward (variant 2).
* Uses Rs = S, i.e., does not take reachability into account.
* @param dtmc the model
* @param mcRewards the rewards
* @param target the target states
* @param unknown the states that are not target or infinity states
* @param inf the infinity states
* @return upper bound on R=?[ F target ] for all states
*/
double computeReachRewardsMaxUpperBoundVariant2(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
{
double[] dt = new double[mdp.getNumStates()];
double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
double[] maxRews = new double[mdp.getNumStates()];
StopWatch timer = new StopWatch(getLog());
timer.start("computing an upper bound for expected reward");
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);
BitSet T = (BitSet) target.clone();
@SuppressWarnings("unused")
int i = 0;
while (true) {
BitSet Si = new BitSet();
i++;
// TODO: might be inefficient, worst-case quadratic runtime...
for (PrimitiveIterator.OfInt it = IterableBitSet.getClearBits(T, mdp.getNumStates() -1 ).iterator(); it.hasNext(); ) {
int s = it.nextInt();
// mainLog.println("Check " + s + " against " + T);
boolean allActionsReachT = true;
for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
if (!mdp.someSuccessorsInSet(s, choice, T)) {
allActionsReachT = false;
break;
}
}
if (allActionsReachT) {
Si.set(s);
}
}
if (Si.isEmpty()) {
break;
}
// mainLog.println("S" + i + " = " + Si);
// mainLog.println("T = " + T);
for (PrimitiveIterator.OfInt it = IterableBitSet.getSetBits(Si).iterator(); it.hasNext(); ) {
final int t = it.nextInt();
final int sccIndexForT = sccs.getSCCIndex(t);
double min = Double.POSITIVE_INFINITY;
for (int choice = 0, choices = mdp.getNumChoices(t); choice < choices; choice++) {
// mainLog.println("State " + t + ", choice = " + choice);
double d = mdp.sumOverTransitions(t, choice, (int __, int u, double prob) -> {
// mainLog.println("t = " + t + ", u = " + u + ", prob = " + prob);
if (!T.get(u))
return 0.0;
boolean inSameSCC = (sccs.getSCCIndex(u) == sccIndexForT);
double d_u_t = inSameSCC ? dt[u] : 1.0;
// mainLog.println("d_u_t = " + d_u_t);
return d_u_t * prob;
});
if (d < min) {
min = d;
}
}
dt[t] = min;
// mainLog.println("d["+t+"] = " + dt[t]);
}
T.or(Si);
}
for (int s = 0; s < mdp.getNumStates(); s++) {
double maxRew = 0;
for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
maxRew = Math.max(maxRew, rew);
}
maxRews[s] = maxRew;
}
double upperBound = 0;
for (PrimitiveIterator.OfInt it = IterableBitSet.getSetBits(unknown).iterator(); it.hasNext();) {
int s = it.nextInt();
boundsOnExpectedVisits[s] = 1 / dt[s];
upperBound += boundsOnExpectedVisits[s] * maxRews[s];
}
timer.stop();
if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
mainLog.println("Upper bound for max expectation computation (variant 2):");
mainLog.println("d_t = " + Arrays.toString(dt));
mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
}
// mainLog.println("Upper bound for expectation (variant 2): " + upperBound);
if (!Double.isFinite(upperBound)) {
throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
}
return upperBound;
}
/**
* Compute expected instantaneous reward,
* i.e. compute the min/max expected reward of the states after {@code k} steps.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param k the number of steps
* @param min Min or max rewards (true=min, false=max)
*/
public ModelCheckerResult computeInstantaneousRewards(MDP mdp, MDPRewards mdpRewards, final int k, boolean min)
{
ModelCheckerResult res = null;
int i, n, iters;
double soln[], soln2[], tmpsoln[];
long timer;
// Store num states
n = mdp.getNumStates();
// Start backwards transient computation
timer = System.currentTimeMillis();
mainLog.println("\nStarting backwards instantaneous rewards computation...");
// Create solution vector(s)
soln = new double[n];
soln2 = new double[n];
// Initialise solution vectors.
for (i = 0; i < n; i++)
soln[i] = mdpRewards.getStateReward(i);
// Start iterations
for (iters = 0; iters < k; iters++) {
// Matrix-vector multiply
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
// Swap vectors for next iter
tmpsoln = soln;
soln = soln2;
soln2 = tmpsoln;
}
// Finished backwards transient computation
timer = System.currentTimeMillis() - timer;
mainLog.print("Backwards transient instantaneous rewards computation");
mainLog.println(" took " + iters + " iters and " + timer / 1000.0 + " seconds.");
// Return results
res = new ModelCheckerResult();
res.soln = soln;
res.lastSoln = soln2;
res.numIters = iters;
res.timeTaken = timer / 1000.0;
res.timePre = 0.0;
return res;
}
/**
* Compute total expected rewards.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param min Min or max rewards (true=min, false=max)
*/
public ModelCheckerResult computeTotalRewards(MDP mdp, MDPRewards mdpRewards, boolean min) throws PrismException
{
if (min) {
throw new PrismNotSupportedException("Minimum total expected reward not supported in explicit engine");
} else {
// max. We don't know if there are positive ECs, so we can't skip precomputation
return computeTotalRewardsMax(mdp, mdpRewards, false);
}
}
/**
* Compute maximal total expected rewards.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param noPositiveECs if true, there are no positive ECs, i.e., all states have finite values (skip precomputation)
*/
public ModelCheckerResult computeTotalRewardsMax(MDP mdp, MDPRewards mdpRewards, boolean noPositiveECs) throws PrismException
{
ModelCheckerResult res = null;
int n;
long timer;
BitSet inf;
// Local copy of setting
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
// Switch to a supported method, if necessary
if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) {
mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
}
if (getDoIntervalIteration()) {
throw new PrismNotSupportedException("Interval iteration for total rewards is currently not supported");
}
// Start expected total reward
timer = System.currentTimeMillis();
mainLog.println("\nStarting total expected reward (max)...");
// Store num states
n = mdp.getNumStates();
long timerPre;
if (noPositiveECs) {
// no inf states
inf = new BitSet();
timerPre = 0;
} else {
mainLog.println("Precomputation: Find positive end components...");
timerPre = System.currentTimeMillis();
ECComputer ecs = ECComputer.createECComputer(this, mdp);
ecs.computeMECStates();
BitSet positiveECs = new BitSet();
for (BitSet ec : ecs.getMECStates()) {
// check if this MEC is positive
boolean positiveEC = false;
for (int state : new IterableStateSet(ec, n)) {
if (mdpRewards.getStateReward(state) > 0) {
// state with positive reward in this MEC
positiveEC = true;
break;
}
for (int choice = 0, numChoices = mdp.getNumChoices(state); choice < numChoices; choice++) {
if (mdpRewards.getTransitionReward(state, choice) > 0 &&
mdp.allSuccessorsInSet(state, choice, ec)) {
// choice from this state with positive reward back into this MEC
positiveEC = true;
break;
}
}
}
if (positiveEC) {
positiveECs.or(ec);
}
}
// inf = Pmax[ <> positiveECs ] > 0
// = ! (Pmax[ <> positiveECs ] = 0)
inf = prob0(mdp, null, positiveECs, false, null); // Pmax[ <> positiveECs ] = 0
inf.flip(0,n); // !(Pmax[ <> positive ECs ] = 0) = Pmax[ <> positiveECs ] > 0
timerPre = System.currentTimeMillis() - timerPre;
mainLog.println("Precomputation took " + timerPre / 1000.0 + " seconds, " + inf.cardinality() + " infinite states, " + (n - inf.cardinality()) + " states remaining.");
}
// Compute rewards
// do standard max reward calculation, but with empty target set
switch (mdpSolnMethod) {
case VALUE_ITERATION:
res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
break;
case GAUSS_SEIDEL:
res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
break;
case POLICY_ITERATION:
res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, null);
break;
default:
throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
}
// Finished expected total reward
timer = System.currentTimeMillis() - timer;
mainLog.println("Expected total reward took " + timer / 1000.0 + " seconds.");
// Update time taken
res.timeTaken = timer / 1000.0;
res.timePre = timerPre / 1000.0;
// Return results
return res;
}
/**
* Compute expected reachability rewards.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param min Min or max rewards (true=min, false=max)
*/
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
{
return computeReachRewards(mdp, mdpRewards, target, min, null, null);
}
/**
* Compute expected reachability rewards.
* i.e. compute the min/max reward accumulated to reach a state in {@code target}.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (may be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).
* Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.
*/
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known)
throws PrismException
{
ModelCheckerResult res = null;
BitSet inf;
int n, numTarget, numInf;
long timer, timerProb1;
int strat[] = null;
// Local copy of setting
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
// Switch to a supported method, if necessary
if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) {
mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
}
// Check for some unsupported combinations
if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
if (known != null) {
throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
}
}
if (doIntervalIteration) {
if (mdpSolnMethod != MDPSolnMethod.VALUE_ITERATION && mdpSolnMethod != MDPSolnMethod.GAUSS_SEIDEL) {
throw new PrismNotSupportedException("Currently, explicit engine only supports interval iteration with value iteration or Gauss-Seidel for MDPs");
}
}
// Start expected reachability
timer = System.currentTimeMillis();
mainLog.println("\nStarting expected reachability (" + (min ? "min" : "max") + ")...");
// Check for deadlocks in non-target state (because breaks e.g. prob1)
mdp.checkForDeadlocks(target);
// Store num states
n = mdp.getNumStates();
// Optimise by enlarging target set (if more info is available)
if (init != null && known != null && !known.isEmpty()) {
BitSet targetNew = (BitSet) target.clone();
for (int i : new IterableBitSet(known)) {
if (init[i] == 1.0) {
targetNew.set(i);
}
}
target = targetNew;
}
// If required, export info about target states
if (getExportTarget()) {
BitSet bsInit = new BitSet(n);
for (int i = 0; i < n; i++) {
bsInit.set(i, mdp.isInitialState(i));
}
List<BitSet> labels = Arrays.asList(bsInit, target);
List<String> labelNames = Arrays.asList("init", "target");
mainLog.println("\nExporting target states info to file \"" + getExportTargetFilename() + "\"...");
exportLabels(mdp, labels, labelNames, Prism.EXPORT_PLAIN, new PrismFileLog(getExportTargetFilename()));
}
// If required, create/initialise strategy storage
// Set choices to -1, denoting unknown
// (except for target states, which are -2, denoting arbitrary)
if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
strat = new int[n];
for (int i = 0; i < n; i++) {
strat[i] = target.get(i) ? -2 : -1;
}
}
// Precomputation (not optional)
timerProb1 = System.currentTimeMillis();
inf = prob1(mdp, null, target, !min, strat);
inf.flip(0, n);
timerProb1 = System.currentTimeMillis() - timerProb1;
// Print results of precomputation
numTarget = target.cardinality();
numInf = inf.cardinality();
mainLog.println("target=" + numTarget + ", inf=" + numInf + ", rest=" + (n - (numTarget + numInf)));
// If required, generate strategy for "inf" states.
if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
if (min) {
// If min reward is infinite, all choices give infinity
// So the choice can be arbitrary, denoted by -2;
for (int i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
strat[i] = -2;
}
} else {
// If max reward is infinite, there is at least one choice giving infinity.
// So we pick, for all "inf" states, the first choice for which some transitions stays in "inf".
for (int i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
int numChoices = mdp.getNumChoices(i);
for (int k = 0; k < numChoices; k++) {
if (mdp.someSuccessorsInSet(i, k, inf)) {
strat[i] = k;
continue;
}
}
}
}
}
ZeroRewardECQuotient quotient = null;
boolean doZeroMECCheckForMin = true;
if (min & doZeroMECCheckForMin) {
StopWatch zeroMECTimer = new StopWatch(mainLog);
zeroMECTimer.start("checking for zero-reward ECs");
mainLog.println("For Rmin, checking for zero-reward ECs...");
BitSet unknown = (BitSet) inf.clone();
unknown.flip(0, mdp.getNumStates());
unknown.andNot(target);
quotient = ZeroRewardECQuotient.getQuotient(this, mdp, unknown, mdpRewards);
if (quotient == null) {
zeroMECTimer.stop("no zero-reward ECs found, proceeding normally");
} else {
zeroMECTimer.stop("built quotient MDP with " + quotient.getNumberOfZeroRewardMECs() + " zero-reward MECs");
if (strat != null) {
throw new PrismException("Constructing a strategy for Rmin in the presence of zero-reward ECs is currently not supported");
}
}
}
if (quotient != null) {
BitSet newInfStates = (BitSet)inf.clone();
newInfStates.or(quotient.getNonRepresentativeStates());
int quotientModelStates = quotient.getModel().getNumStates() - newInfStates.cardinality();
mainLog.println("Computing Rmin in zero-reward EC quotient model (" + quotientModelStates + " relevant states)...");
res = computeReachRewardsNumeric(quotient.getModel(), quotient.getRewards(), mdpSolnMethod, target, newInfStates, min, init, known, strat);
quotient.mapResults(res.soln);
} else {
res = computeReachRewardsNumeric(mdp, mdpRewards, mdpSolnMethod, target, inf, min, init, known, strat);
}
// Store strategy
if (genStrat) {
res.strat = new MDStrategyArray(mdp, strat);
}
// Export adversary
if (exportAdv) {
// Prune strategy, if needed
if (getRestrictStratToReach()) {
restrictStrategyToReachableStates(mdp, strat);
}
// Export
PrismLog out = new PrismFileLog(exportAdvFilename);
new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
out.close();
}
// Finished expected reachability
timer = System.currentTimeMillis() - timer;
mainLog.println("Expected reachability took " + timer / 1000.0 + " seconds.");
// Update time taken
res.timeTaken = timer / 1000.0;
res.timePre = timerProb1 / 1000.0;
return res;
}
protected ModelCheckerResult computeReachRewardsNumeric(MDP mdp, MDPRewards mdpRewards, MDPSolnMethod method, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[]) throws PrismException
{
ModelCheckerResult res = null;
IterationMethod iterationMethod = null;
switch (method) {
case VALUE_ITERATION:
iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
break;
case GAUSS_SEIDEL:
iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
break;
case POLICY_ITERATION:
if (doIntervalIteration) {
throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
}
res = computeReachRewardsPolIter(mdp, mdpRewards, target, inf, min, strat);
break;
default:
throw new PrismException("Unknown MDP solution method " + method.fullName());
}
if (res == null) { // not yet computed, use iterationMethod
if (!doIntervalIteration) {
res = doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, getDoTopologicalValueIteration(), strat);
} else {
res = doIntervalIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, getDoTopologicalValueIteration(), strat);
}
}
return res;
}
/**
* Compute expected reachability rewards using value iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[])
throws PrismException
{
IterationMethodPower iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
return doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, false, strat);
}
/**
* Compute expected reachability rewards using value iteration.
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param topological Do topological value iteration?
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult doValueIterationReachRewards(MDP mdp, MDPRewards mdpRewards, IterationMethod iterationMethod, BitSet target, BitSet inf, boolean min, double init[], BitSet known, boolean topological, int strat[])
throws PrismException
{
BitSet unknown;
int i, n;
long timer;
// Start value iteration
timer = System.currentTimeMillis();
String description = (min ? "min" : "max") + (topological ? ", topological" : "" ) + ", with " + iterationMethod.getDescriptionShort();
mainLog.println("Starting value iteration (" + description + ")...");
ExportIterations iterationsExport = null;
if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
iterationsExport = new ExportIterations("Explicit MDP ReachRewards value iteration (" + description +")");
mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
}
// Store num states
n = mdp.getNumStates();
// Initialise solution vectors. Use (where available) the following in order of preference:
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0
if (init != null) {
if (known != null) {
for (i = 0; i < n; i++)
init[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
} else {
for (i = 0; i < n; i++)
init[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
}
} else {
init = new double[n];
for (i = 0; i < n; i++)
init[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
}
// Determine set of states actually need to compute values for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(target);
unknown.andNot(inf);
if (known != null)
unknown.andNot(known);
if (iterationsExport != null)
iterationsExport.exportVector(init, 0);
IterationMethod.IterationValIter forMvMultRewMinMax = iterationMethod.forMvMultRewMinMax(mdp, mdpRewards, min, strat);
forMvMultRewMinMax.init(init);
IntSet unknownStates = IntSet.asIntSet(unknown);
if (topological) {
// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
// states in unknown
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);
IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
soln[s] = mdp.mvMultRewJacMinMaxSingle(s, soln, mdpRewards, min, strat);
};
// run the actual value iteration
return iterationMethod.doTopologicalValueIteration(this, description, sccs, forMvMultRewMinMax, singletonSCCSolver, timer, iterationsExport);
} else {
// run the actual value iteration
return iterationMethod.doValueIteration(this, description, forMvMultRewMinMax, unknownStates, timer, iterationsExport);
}
}
/**
* Compute expected reachability rewards using Gauss-Seidel (including Jacobi-style updates).
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult computeReachRewardsGaussSeidel(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[],
BitSet known, int strat[]) throws PrismException
{
IterationMethodGS iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
return doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, false, strat);
}
/**
* Compute expected reachability rewards using interval iteration
* Optionally, store optimal (memoryless) strategy info.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (will be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* @param topological do topological interval iteration
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult doIntervalIterationReachRewards(MDP mdp, MDPRewards mdpRewards, IterationMethod iterationMethod, BitSet target, BitSet inf, boolean min, double init[], BitSet known, boolean topological, int strat[])
throws PrismException
{
BitSet unknown;
int i, n;
double initBelow[], initAbove[];
long timer;
// Store num states
n = mdp.getNumStates();
// Determine set of states actually need to compute values for
unknown = new BitSet();
unknown.set(0, n);
unknown.andNot(target);
unknown.andNot(inf);
if (known != null)
unknown.andNot(known);
OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);
double upperBound;
if (iiOptions.hasManualUpperBound()) {
upperBound = iiOptions.getManualUpperBound();
getLog().printWarning("Upper bound for interval iteration manually set to " + upperBound);
} else {
if (min) {
upperBound = computeReachRewardsMinUpperBound(mdp, mdpRewards, target, unknown, inf);
} else {
upperBound = computeReachRewardsMaxUpperBound(mdp, mdpRewards, target, unknown, inf);
}
}
double lowerBound;
if (iiOptions.hasManualLowerBound()) {
lowerBound = iiOptions.getManualLowerBound();
getLog().printWarning("Lower bound for interval iteration manually set to " + lowerBound);
} else {
lowerBound = 0.0;
}
if (min) {
if (!isContracting(mdp, unknown, target)) {
throw new PrismNotSupportedException("Interval iteration for Rmin and non-contracting MDP currently not supported");
} else {
mainLog.println("Relevant sub-MDP is contracting, proceed...");
}
}
// Start value iteration
timer = System.currentTimeMillis();
String description = (min ? "min" : "max") + (topological ? ", topological" : "") + ", with " + iterationMethod.getDescriptionShort();
mainLog.println("Starting interval iteration (" + description + ")...");
ExportIterations iterationsExport = null;
if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
iterationsExport = new ExportIterations("Explicit MDP ReachRewards interval iteration (" + description + ")");
mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
}
// Create initial solution vector(s)
initBelow = (init == null) ? new double[n] : init;
initAbove = new double[n];
// Initialise solution vector from below. Use (where available) the following in order of preference:
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) lowerBound
if (init != null && known != null) {
for (i = 0; i < n; i++)
initBelow[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : lowerBound;
} else {
for (i = 0; i < n; i++)
initBelow[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : lowerBound;
}
// Initialise solution vector from above. Use (where available) the following in order of preference:
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) upperBound
if (init != null && known != null) {
for (i = 0; i < n; i++)
initAbove[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : upperBound;
} else {
for (i = 0; i < n; i++)
initAbove[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : upperBound;
}
if (iterationsExport != null) {
iterationsExport.exportVector(initBelow, 0);
iterationsExport.exportVector(initAbove, 1);
}
final boolean enforceMonotonicFromBelow = iiOptions.isEnforceMonotonicityFromBelow();
final boolean enforceMonotonicFromAbove = iiOptions.isEnforceMonotonicityFromAbove();
final boolean checkMonotonic = iiOptions.isCheckMonotonicity();
if (!enforceMonotonicFromAbove) {
getLog().println("Note: Interval iteration is configured to not enforce monotonicity from above.");
}
if (!enforceMonotonicFromBelow) {
getLog().println("Note: Interval iteration is configured to not enforce monotonicity from below.");
}
IterationMethod.IterationIntervalIter below = iterationMethod.forMvMultRewMinMaxInterval(mdp, mdpRewards, min, strat, true, enforceMonotonicFromBelow, checkMonotonic);
IterationMethod.IterationIntervalIter above = iterationMethod.forMvMultRewMinMaxInterval(mdp, mdpRewards, min, strat, false, enforceMonotonicFromAbove, checkMonotonic);
below.init(initBelow);
above.init(initAbove);
IntSet unknownStates = IntSet.asIntSet(unknown);
ModelCheckerResult rv;
if (topological) {
// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
// states in unknown
SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);
IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
soln[s] = mdp.mvMultRewJacMinMaxSingle(s, soln, mdpRewards, min, strat);
};
// run the actual value iteration
rv = iterationMethod.doTopologicalIntervalIteration(this, description, sccs, below, above, singletonSCCSolver, timer, iterationsExport);
} else {
// run the actual value iteration
rv = iterationMethod.doIntervalIteration(this, description, below, above, unknownStates, timer, iterationsExport);
}
double max_v = PrismUtils.findMaxFinite(rv.soln, unknownStates.iterator());
if (max_v != Double.NEGATIVE_INFINITY) {
mainLog.println("Maximum finite value in solution vector at end of interval iteration: " + max_v);
}
return rv;
}
/**
* Compute expected reachability rewards using policy iteration.
* The array {@code strat} is used both to pass in the initial strategy for policy iteration,
* and as storage for the resulting optimal strategy (if needed).
* Passing in an initial strategy is required when some states have infinite reward,
* to avoid the possibility of policy iteration getting stuck on an infinite-value strategy.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
protected ModelCheckerResult computeReachRewardsPolIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, int strat[])
throws PrismException
{
ModelCheckerResult res;
int i, n, iters, totalIters;
double soln[], soln2[];
boolean done;
long timer;
DTMCModelChecker mcDTMC;
DTMC dtmc;
MCRewards mcRewards;
// Re-use solution to solve each new policy (strategy)?
boolean reUseSoln = true;
// Start policy iteration
timer = System.currentTimeMillis();
mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
// Create a DTMC model checker (for solving policies)
mcDTMC = new DTMCModelChecker(this);
mcDTMC.inheritSettings(this);
mcDTMC.setLog(new PrismDevNullLog());
// Store num states
n = mdp.getNumStates();
// Create solution vector(s)
soln = new double[n];
soln2 = new double[n];
// Initialise solution vectors.
for (i = 0; i < n; i++)
soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
// If not passed in, create new storage for strategy and initialise
// Initial strategy just picks first choice (0) everywhere
if (strat == null) {
strat = new int[n];
for (i = 0; i < n; i++)
strat[i] = 0;
}
// Start iterations
iters = totalIters = 0;
done = false;
while (!done && iters < maxIters) {
iters++;
// Solve induced DTMC for strategy
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
mcRewards = new MCRewardsFromMDPRewards(mdpRewards, strat);
res = mcDTMC.computeReachRewardsValIter(dtmc, mcRewards, target, inf, reUseSoln ? soln : null, null);
soln = res.soln;
totalIters += res.numIters;
// Check if optimal, improve non-optimal choices
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null);
done = true;
for (i = 0; i < n; i++) {
// Don't look at target/inf states - we may not have strategy info for them,
// so they might appear non-optimal
if (target.get(i) || inf.get(i))
continue;
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
done = false;
List<Integer> opt = mdp.mvMultRewMinMaxSingleChoices(i, soln, mdpRewards, min, soln2[i]);
// Only update strategy if strictly better
if (!opt.contains(strat[i]))
strat[i] = opt.get(0);
}
}
}
// Finished policy iteration
timer = System.currentTimeMillis() - timer;
mainLog.print("Policy iteration");
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
// Return results
res = new ModelCheckerResult();
res.soln = soln;
res.numIters = totalIters;
res.timeTaken = timer / 1000.0;
return res;
}
/**
* Construct strategy information for min/max expected reachability.
* (More precisely, list of indices of choices resulting in min/max.)
* (Note: indices are guaranteed to be sorted in ascending order.)
* @param mdp The MDP
* @param mdpRewards The rewards
* @param state The state to generate strategy info for
* @param target The set of target states to reach
* @param min Min or max rewards (true=min, false=max)
* @param lastSoln Vector of values from which to recompute in one iteration
*/
public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
{
double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null);
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
}
/**
* Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
* to the states of the MDP that are reachable under that strategy.
* @param mdp The MDP
* @param strat The strategy
*/
public void restrictStrategyToReachableStates(MDP mdp, int strat[])
{
BitSet restrict = new BitSet();
BitSet explore = new BitSet();
// Get initial states
for (int is : mdp.getInitialStates()) {
restrict.set(is);
explore.set(is);
}
// Compute reachable states (store in 'restrict')
boolean foundMore = true;
while (foundMore) {
foundMore = false;
for (int s = explore.nextSetBit(0); s >= 0; s = explore.nextSetBit(s + 1)) {
explore.set(s, false);
if (strat[s] >= 0) {
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, strat[s]);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
int dest = e.getKey();
if (!restrict.get(dest)) {
foundMore = true;
restrict.set(dest);
explore.set(dest);
}
}
}
}
}
// Set strategy choice for non-reachable state to -1
int n = mdp.getNumStates();
for (int s = restrict.nextClearBit(0); s < n; s = restrict.nextClearBit(s + 1)) {
strat[s] = -3;
}
}
/**
* Compute the end component quotient (for use with PMax),
* each maximal end component is collapsed to a single state,
* likewise the yes and no regions, respectively.
*/
private MDPEquiv maxQuotient(MDP mdp, BitSet yes, BitSet no) throws PrismException
{
BitSet maybe = new BitSet();
maybe.set(0, mdp.getNumStates());
maybe.andNot(yes);
maybe.andNot(no);
ECComputer ec = ECComputer.createECComputer(this, mdp);
ec.computeMECStates(maybe);
List<BitSet> mecs = ec.getMECStates();
mecs.add(yes);
mecs.add(no);
EquivalenceRelationInteger eq = new EquivalenceRelationInteger(mecs);
BasicModelTransformation<MDP, MDPEquiv> quotientTransform = MDPEquiv.transformDroppingLoops(mdp, eq);
MDPEquiv quotient = quotientTransform.getTransformedModel();
//mdp.exportToDotFile("original.dot");
//quotient.exportToDotFile("maxQuotient.dot");
int realStates = quotient.getNumStates() - quotient.getNonRepresentativeStates().cardinality();
mainLog.println("Max-Quotient MDP: " + realStates + " equivalence classes / non-trap states.");
return quotient;
}
/**
* Simple test program.
*/
public static void main(String args[])
{
MDPModelChecker mc;
MDPSimple mdp;
ModelCheckerResult res;
BitSet init, target;
Map<String, BitSet> labels;
boolean min = true;
try {
mc = new MDPModelChecker(null);
mdp = new MDPSimple();
mdp.buildFromPrismExplicit(args[0]);
mdp.addInitialState(0);
//System.out.println(mdp);
labels = StateModelChecker.loadLabelsFile(args[1]);
//System.out.println(labels);
init = labels.get("init");
target = labels.get(args[2]);
if (target == null)
throw new PrismException("Unknown label \"" + args[2] + "\"");
for (int i = 3; i < args.length; i++) {
if (args[i].equals("-min"))
min = true;
else if (args[i].equals("-max"))
min = false;
else if (args[i].equals("-nopre"))
mc.setPrecomp(false);
}
res = mc.computeReachProbs(mdp, target, min);
System.out.println(res.soln[init.nextSetBit(0)]);
} catch (PrismException e) {
System.out.println(e);
}
}
}