You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1683 lines
56 KiB
1683 lines
56 KiB
//==============================================================================
|
|
//
|
|
// Copyright (c) 2002-
|
|
// Authors:
|
|
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
|
|
//
|
|
//------------------------------------------------------------------------------
|
|
//
|
|
// This file is part of PRISM.
|
|
//
|
|
// PRISM is free software; you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation; either version 2 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// PRISM is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU General Public License
|
|
// along with PRISM; if not, write to the Free Software Foundation,
|
|
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
//
|
|
//==============================================================================
|
|
|
|
package explicit;
|
|
|
|
import java.util.BitSet;
|
|
import java.util.Iterator;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Vector;
|
|
|
|
import parser.ast.Expression;
|
|
import prism.DRA;
|
|
import prism.Pair;
|
|
import prism.PrismComponent;
|
|
import prism.PrismDevNullLog;
|
|
import prism.PrismException;
|
|
import prism.PrismFileLog;
|
|
import prism.PrismLog;
|
|
import prism.PrismUtils;
|
|
import strat.MDStrategyArray;
|
|
import explicit.rewards.MCRewards;
|
|
import explicit.rewards.MCRewardsFromMDPRewards;
|
|
import explicit.rewards.MDPRewards;
|
|
|
|
/**
|
|
* Explicit-state model checker for Markov decision processes (MDPs).
|
|
*/
|
|
public class MDPModelChecker extends ProbModelChecker
|
|
{
|
|
/**
|
|
* Create a new MDPModelChecker, inherit basic state from parent (unless null).
|
|
*/
|
|
public MDPModelChecker(PrismComponent parent) throws PrismException
|
|
{
|
|
super(parent);
|
|
}
|
|
|
|
// Model checking functions
|
|
|
|
@Override
|
|
protected StateValues checkProbPathFormulaLTL(Model model, Expression expr, boolean qual, MinMax minMax, BitSet statesOfInterest) throws PrismException
|
|
{
|
|
LTLModelChecker mcLtl;
|
|
StateValues probsProduct, probs;
|
|
Expression ltl;
|
|
DRA<BitSet> dra;
|
|
NondetModel modelProduct;
|
|
MDPModelChecker mcProduct;
|
|
long time;
|
|
|
|
// Can't do LTL with time-bounded variants of the temporal operators
|
|
if (Expression.containsTemporalTimeBounds(expr)) {
|
|
throw new PrismException("Time-bounded operators not supported in LTL: " + expr);
|
|
}
|
|
|
|
// For LTL model checking routines
|
|
mcLtl = new LTLModelChecker(this);
|
|
|
|
// Model check maximal state formulas
|
|
Vector<BitSet> labelBS = new Vector<BitSet>();
|
|
ltl = mcLtl.checkMaximalStateFormulas(this, model, expr.deepCopy(), labelBS);
|
|
|
|
// Convert LTL formula to deterministic Rabin automaton (DRA)
|
|
// For min probabilities, need to negate the formula
|
|
// (add parentheses to allow re-parsing if required)
|
|
if (minMax.isMin()) {
|
|
ltl = Expression.Not(Expression.Parenth(ltl));
|
|
}
|
|
mainLog.println("\nBuilding deterministic Rabin automaton (for " + ltl + ")...");
|
|
time = System.currentTimeMillis();
|
|
dra = LTLModelChecker.convertLTLFormulaToDRA(ltl);
|
|
int draSize = dra.size();
|
|
mainLog.println("DRA has " + dra.size() + " states, " + dra.getNumAcceptancePairs() + " pairs.");
|
|
time = System.currentTimeMillis() - time;
|
|
mainLog.println("Time for Rabin translation: " + time / 1000.0 + " seconds.");
|
|
// If required, export DRA
|
|
if (settings.getExportPropAut()) {
|
|
mainLog.println("Exporting DRA to file \"" + settings.getExportPropAutFilename() + "\"...");
|
|
PrismLog out = new PrismFileLog(settings.getExportPropAutFilename());
|
|
out.println(dra);
|
|
out.close();
|
|
//dra.printDot(new java.io.PrintStream("dra.dot"));
|
|
}
|
|
|
|
// Build product of MDP and automaton
|
|
mainLog.println("\nConstructing MDP-DRA product...");
|
|
Pair<NondetModel, int[]> pair = mcLtl.constructProductMDP(dra, (MDP) model, labelBS, statesOfInterest);
|
|
modelProduct = pair.first;
|
|
int invMap[] = pair.second;
|
|
|
|
// Find accepting MECs + compute reachability probabilities
|
|
mainLog.println("\nFinding accepting MECs...");
|
|
BitSet acceptingMECs = mcLtl.findAcceptingECStatesForRabin(dra, modelProduct, invMap);
|
|
mainLog.println("\nComputing reachability probabilities...");
|
|
mcProduct = new MDPModelChecker(this);
|
|
mcProduct.inheritSettings(this);
|
|
probsProduct = StateValues.createFromDoubleArray(mcProduct.computeReachProbs((MDP) modelProduct, acceptingMECs, false).soln, modelProduct);
|
|
|
|
// Subtract from 1 if we're model checking a negated formula for regular Pmin
|
|
if (minMax.isMin()) {
|
|
probsProduct.timesConstant(-1.0);
|
|
probsProduct.plusConstant(1.0);
|
|
}
|
|
|
|
// Mapping probabilities in the original model
|
|
double[] probsProductDbl = probsProduct.getDoubleArray();
|
|
double[] probsDbl = new double[model.getNumStates()];
|
|
|
|
// Get the probabilities for the original model by taking the initial states
|
|
// of the product and projecting back to the states of the original model
|
|
for (int i : modelProduct.getInitialStates()) {
|
|
int s = invMap[i] / draSize;
|
|
probsDbl[s] = probsProductDbl[i];
|
|
}
|
|
|
|
probs = StateValues.createFromDoubleArray(probsDbl, model);
|
|
probsProduct.clear();
|
|
|
|
return probs;
|
|
}
|
|
|
|
// Numerical computation functions
|
|
|
|
/**
|
|
* Compute next=state probabilities.
|
|
* i.e. compute the probability of being in a state in {@code target} in the next step.
|
|
* @param mdp The MDP
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeNextProbs(MDP mdp, BitSet target, boolean min) throws PrismException
|
|
{
|
|
ModelCheckerResult res = null;
|
|
int n;
|
|
double soln[], soln2[];
|
|
long timer;
|
|
|
|
timer = System.currentTimeMillis();
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create/initialise solution vector(s)
|
|
soln = Utils.bitsetToDoubleArray(target, n);
|
|
soln2 = new double[n];
|
|
|
|
// Next-step probabilities
|
|
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln2;
|
|
res.numIters = 1;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Given a value vector x, compute the probability:
|
|
* v(s) = min/max sched [ Sum_s' P_sched(s,s')*x(s') ] for s labeled with a,
|
|
* v(s) = 0 for s not labeled with a.
|
|
*
|
|
* Clears the StateValues object x.
|
|
*
|
|
* @param tr the transition matrix
|
|
* @param a the set of states labeled with a
|
|
* @param x the value vector
|
|
* @param min compute min instead of max
|
|
*/
|
|
public double[] computeRestrictedNext(MDP mdp, BitSet a, double[] x, boolean min)
|
|
{
|
|
int n;
|
|
double soln[];
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// initialized to 0.0
|
|
soln = new double[n];
|
|
|
|
// Next-step probabilities multiplication
|
|
// restricted to a states
|
|
mdp.mvMultMinMax(x, min, soln, a, false, null);
|
|
|
|
return soln;
|
|
}
|
|
|
|
/**
|
|
* Compute reachability probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target}.
|
|
* @param mdp The MDP
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeReachProbs(MDP mdp, BitSet target, boolean min) throws PrismException
|
|
{
|
|
return computeReachProbs(mdp, null, target, min, null, null);
|
|
}
|
|
|
|
/**
|
|
* Compute until probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target},
|
|
* while remaining in those in @{code remain}.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeUntilProbs(MDP mdp, BitSet remain, BitSet target, boolean min) throws PrismException
|
|
{
|
|
return computeReachProbs(mdp, remain, target, min, null, null);
|
|
}
|
|
|
|
/**
|
|
* Compute reachability/until probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target},
|
|
* while remaining in those in @{code remain}.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (may be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).
|
|
* Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.
|
|
*/
|
|
public ModelCheckerResult computeReachProbs(MDP mdp, BitSet remain, BitSet target, boolean min, double init[], BitSet known) throws PrismException
|
|
{
|
|
ModelCheckerResult res = null;
|
|
BitSet targetOrig, no, yes;
|
|
int i, n, numYes, numNo;
|
|
long timer, timerProb0, timerProb1;
|
|
int strat[] = null;
|
|
// Local copy of setting
|
|
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
|
|
|
|
// Switch to a supported method, if necessary
|
|
if (mdpSolnMethod == MDPSolnMethod.LINEAR_PROGRAMMING) {
|
|
mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
|
|
mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
|
|
}
|
|
|
|
// Check for some unsupported combinations
|
|
if (mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION && valIterDir == ValIterDir.ABOVE) {
|
|
if (!(precomp && prob0))
|
|
throw new PrismException("Precomputation (Prob0) must be enabled for value iteration from above");
|
|
if (!min)
|
|
throw new PrismException("Value iteration from above only works for minimum probabilities");
|
|
}
|
|
if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION || mdpSolnMethod == MDPSolnMethod.MODIFIED_POLICY_ITERATION) {
|
|
if (known != null) {
|
|
throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
|
|
}
|
|
}
|
|
|
|
// Start probabilistic reachability
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("\nStarting probabilistic reachability (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Check for deadlocks in non-target state (because breaks e.g. prob1)
|
|
mdp.checkForDeadlocks(target);
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Optimise by enlarging target set (if more info is available)
|
|
targetOrig = target;
|
|
if (init != null && known != null) {
|
|
target = new BitSet(n);
|
|
for (i = 0; i < n; i++) {
|
|
target.set(i, targetOrig.get(i) || (known.get(i) && init[i] == 1.0));
|
|
}
|
|
}
|
|
|
|
// If required, create/initialise strategy storage
|
|
// Set choices to -1, denoting unknown
|
|
// (except for target states, which are -2, denoting arbitrary)
|
|
if (genStrat || exportAdv) {
|
|
strat = new int[n];
|
|
for (i = 0; i < n; i++) {
|
|
strat[i] = target.get(i) ? -2 : -1;
|
|
}
|
|
}
|
|
|
|
// Precomputation
|
|
timerProb0 = System.currentTimeMillis();
|
|
if (precomp && prob0) {
|
|
no = prob0(mdp, remain, target, min, strat);
|
|
} else {
|
|
no = new BitSet();
|
|
}
|
|
timerProb0 = System.currentTimeMillis() - timerProb0;
|
|
timerProb1 = System.currentTimeMillis();
|
|
if (precomp && prob1) {
|
|
yes = prob1(mdp, remain, target, min, strat);
|
|
} else {
|
|
yes = (BitSet) target.clone();
|
|
}
|
|
timerProb1 = System.currentTimeMillis() - timerProb1;
|
|
|
|
// Print results of precomputation
|
|
numYes = yes.cardinality();
|
|
numNo = no.cardinality();
|
|
mainLog.println("target=" + target.cardinality() + ", yes=" + numYes + ", no=" + numNo + ", maybe=" + (n - (numYes + numNo)));
|
|
|
|
// If still required, store strategy for no/yes (0/1) states.
|
|
// This is just for the cases max=0 and min=1, where arbitrary choices suffice (denoted by -2)
|
|
if (genStrat || exportAdv) {
|
|
if (min) {
|
|
for (i = yes.nextSetBit(0); i >= 0; i = yes.nextSetBit(i + 1)) {
|
|
if (!target.get(i))
|
|
strat[i] = -2;
|
|
}
|
|
} else {
|
|
for (i = no.nextSetBit(0); i >= 0; i = no.nextSetBit(i + 1)) {
|
|
strat[i] = -2;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute probabilities (if needed)
|
|
if (numYes + numNo < n) {
|
|
switch (mdpSolnMethod) {
|
|
case VALUE_ITERATION:
|
|
res = computeReachProbsValIter(mdp, no, yes, min, init, known, strat);
|
|
break;
|
|
case GAUSS_SEIDEL:
|
|
res = computeReachProbsGaussSeidel(mdp, no, yes, min, init, known, strat);
|
|
break;
|
|
case POLICY_ITERATION:
|
|
res = computeReachProbsPolIter(mdp, no, yes, min, strat);
|
|
break;
|
|
case MODIFIED_POLICY_ITERATION:
|
|
res = computeReachProbsModPolIter(mdp, no, yes, min, strat);
|
|
break;
|
|
default:
|
|
throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
|
|
}
|
|
} else {
|
|
res = new ModelCheckerResult();
|
|
res.soln = Utils.bitsetToDoubleArray(yes, n);
|
|
}
|
|
|
|
// Finished probabilistic reachability
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.println("Probabilistic reachability took " + timer / 1000.0 + " seconds.");
|
|
|
|
// Store strategy
|
|
if (genStrat) {
|
|
res.strat = new MDStrategyArray(mdp, strat);
|
|
}
|
|
// Export adversary
|
|
if (exportAdv) {
|
|
// Prune strategy
|
|
restrictStrategyToReachableStates(mdp, strat);
|
|
// Print strategy
|
|
mainLog.print("Strat:");
|
|
for (i = 0; i < n; i++) {
|
|
mainLog.print(" " + i + ":" + strat[i]);
|
|
}
|
|
mainLog.println();
|
|
// Export
|
|
PrismLog out = new PrismFileLog(exportAdvFilename);
|
|
new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
|
|
out.close();
|
|
}
|
|
|
|
// Update time taken
|
|
res.timeTaken = timer / 1000.0;
|
|
res.timeProb0 = timerProb0 / 1000.0;
|
|
res.timePre = (timerProb0 + timerProb1) / 1000.0;
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Prob0 precomputation algorithm.
|
|
* i.e. determine the states of an MDP which, with min/max probability 0,
|
|
* reach a state in {@code target}, while remaining in those in @{code remain}.
|
|
* {@code min}=true gives Prob0E, {@code min}=false gives Prob0A.
|
|
* Optionally, for min only, store optimal (memoryless) strategy info for 0 states.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
*/
|
|
public BitSet prob0(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[])
|
|
{
|
|
int n, iters;
|
|
BitSet u, soln, unknown;
|
|
boolean u_done;
|
|
long timer;
|
|
|
|
// Start precomputation
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Special case: no target states
|
|
if (target.cardinality() == 0) {
|
|
soln = new BitSet(mdp.getNumStates());
|
|
soln.set(0, mdp.getNumStates());
|
|
return soln;
|
|
}
|
|
|
|
// Initialise vectors
|
|
n = mdp.getNumStates();
|
|
u = new BitSet(n);
|
|
soln = new BitSet(n);
|
|
|
|
// Determine set of states actually need to perform computation for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(target);
|
|
if (remain != null)
|
|
unknown.and(remain);
|
|
|
|
// Fixed point loop
|
|
iters = 0;
|
|
u_done = false;
|
|
// Least fixed point - should start from 0 but we optimise by
|
|
// starting from 'target', thus bypassing first iteration
|
|
u.or(target);
|
|
soln.or(target);
|
|
while (!u_done) {
|
|
iters++;
|
|
// Single step of Prob0
|
|
mdp.prob0step(unknown, u, min, soln);
|
|
// Check termination
|
|
u_done = soln.equals(u);
|
|
// u = soln
|
|
u.clear();
|
|
u.or(soln);
|
|
}
|
|
|
|
// Negate
|
|
u.flip(0, n);
|
|
|
|
// Finished precomputation
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Prob0 (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// If required, generate strategy. This is for min probs,
|
|
// so it can be done *after* the main prob0 algorithm (unlike for prob1).
|
|
// We simply pick, for all "no" states, the first choice for which all transitions stay in "no"
|
|
if (strat != null) {
|
|
for (int i = u.nextSetBit(0); i >= 0; i = u.nextSetBit(i + 1)) {
|
|
int numChoices = mdp.getNumChoices(i);
|
|
for (int k = 0; k < numChoices; k++) {
|
|
if (mdp.allSuccessorsInSet(i, k, u)) {
|
|
strat[i] = k;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return u;
|
|
}
|
|
|
|
/**
|
|
* Prob1 precomputation algorithm.
|
|
* i.e. determine the states of an MDP which, with min/max probability 1,
|
|
* reach a state in {@code target}, while remaining in those in @{code remain}.
|
|
* {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
|
|
* Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
*/
|
|
public BitSet prob1(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[])
|
|
{
|
|
int n, iters;
|
|
BitSet u, v, soln, unknown;
|
|
boolean u_done, v_done;
|
|
long timer;
|
|
|
|
// Start precomputation
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Special case: no target states
|
|
if (target.cardinality() == 0) {
|
|
return new BitSet(mdp.getNumStates());
|
|
}
|
|
|
|
// Initialise vectors
|
|
n = mdp.getNumStates();
|
|
u = new BitSet(n);
|
|
v = new BitSet(n);
|
|
soln = new BitSet(n);
|
|
|
|
// Determine set of states actually need to perform computation for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(target);
|
|
if (remain != null)
|
|
unknown.and(remain);
|
|
|
|
// Nested fixed point loop
|
|
iters = 0;
|
|
u_done = false;
|
|
// Greatest fixed point
|
|
u.set(0, n);
|
|
while (!u_done) {
|
|
v_done = false;
|
|
// Least fixed point - should start from 0 but we optimise by
|
|
// starting from 'target', thus bypassing first iteration
|
|
v.clear();
|
|
v.or(target);
|
|
soln.clear();
|
|
soln.or(target);
|
|
while (!v_done) {
|
|
iters++;
|
|
// Single step of Prob1
|
|
if (min)
|
|
mdp.prob1Astep(unknown, u, v, soln);
|
|
else
|
|
mdp.prob1Estep(unknown, u, v, soln, null);
|
|
// Check termination (inner)
|
|
v_done = soln.equals(v);
|
|
// v = soln
|
|
v.clear();
|
|
v.or(soln);
|
|
}
|
|
// Check termination (outer)
|
|
u_done = v.equals(u);
|
|
// u = v
|
|
u.clear();
|
|
u.or(v);
|
|
}
|
|
|
|
// If we need to generate a strategy, do another iteration of the inner loop for this
|
|
// We could do this during the main double fixed point above, but we would generate surplus
|
|
// strategy info for non-1 states during early iterations of the outer loop,
|
|
// which are not straightforward to remove since this method does not know which states
|
|
// already have valid strategy info from Prob0.
|
|
// Notice that we only need to look at states in u (since we already know the answer),
|
|
// so we restrict 'unknown' further
|
|
unknown.and(u);
|
|
if (!min && strat != null) {
|
|
v_done = false;
|
|
v.clear();
|
|
v.or(target);
|
|
soln.clear();
|
|
soln.or(target);
|
|
while (!v_done) {
|
|
mdp.prob1Estep(unknown, u, v, soln, strat);
|
|
v_done = soln.equals(v);
|
|
v.clear();
|
|
v.or(soln);
|
|
}
|
|
u_done = v.equals(u);
|
|
}
|
|
|
|
// Finished precomputation
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
return u;
|
|
}
|
|
|
|
/**
|
|
* Compute reachability probabilities using value iteration.
|
|
* Optionally, store optimal (memoryless) strategy info.
|
|
* @param mdp The MDP
|
|
* @param no Probability 0 states
|
|
* @param yes Probability 1 states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (will be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
|
|
*/
|
|
protected ModelCheckerResult computeReachProbsValIter(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
BitSet unknown;
|
|
int i, n, iters;
|
|
double soln[], soln2[], tmpsoln[], initVal;
|
|
boolean done;
|
|
long timer;
|
|
|
|
// Start value iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting value iteration (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector(s)
|
|
soln = new double[n];
|
|
soln2 = (init == null) ? new double[n] : init;
|
|
|
|
// Initialise solution vectors. Use (where available) the following in order of preference:
|
|
// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal
|
|
// where initVal is 0.0 or 1.0, depending on whether we converge from below/above.
|
|
initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0;
|
|
if (init != null) {
|
|
if (known != null) {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
|
|
}
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal;
|
|
}
|
|
|
|
// Determine set of states actually need to compute values for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(yes);
|
|
unknown.andNot(no);
|
|
if (known != null)
|
|
unknown.andNot(known);
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
done = false;
|
|
while (!done && iters < maxIters) {
|
|
iters++;
|
|
// Matrix-vector multiply and min/max ops
|
|
mdp.mvMultMinMax(soln, min, soln2, unknown, false, strat);
|
|
// Check termination
|
|
done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE);
|
|
// Swap vectors for next iter
|
|
tmpsoln = soln;
|
|
soln = soln2;
|
|
soln2 = tmpsoln;
|
|
}
|
|
|
|
// Finished value iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Value iteration (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Non-convergence is an error (usually)
|
|
if (!done && errorOnNonConverge) {
|
|
String msg = "Iterative method did not converge within " + iters + " iterations.";
|
|
msg += "\nConsider using a different numerical method or increasing the maximum number of iterations";
|
|
throw new PrismException(msg);
|
|
}
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute reachability probabilities using Gauss-Seidel (including Jacobi-style updates).
|
|
* @param mdp The MDP
|
|
* @param no Probability 0 states
|
|
* @param yes Probability 1 states
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (will be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
|
|
*/
|
|
protected ModelCheckerResult computeReachProbsGaussSeidel(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
BitSet unknown;
|
|
int i, n, iters;
|
|
double soln[], initVal, maxDiff;
|
|
boolean done;
|
|
long timer;
|
|
|
|
// Start value iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector
|
|
soln = (init == null) ? new double[n] : init;
|
|
|
|
// Initialise solution vector. Use (where available) the following in order of preference:
|
|
// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal
|
|
// where initVal is 0.0 or 1.0, depending on whether we converge from below/above.
|
|
initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0;
|
|
if (init != null) {
|
|
if (known != null) {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
|
|
}
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal;
|
|
}
|
|
|
|
// Determine set of states actually need to compute values for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(yes);
|
|
unknown.andNot(no);
|
|
if (known != null)
|
|
unknown.andNot(known);
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
done = false;
|
|
while (!done && iters < maxIters) {
|
|
iters++;
|
|
// Matrix-vector multiply
|
|
maxDiff = mdp.mvMultGSMinMax(soln, min, unknown, false, termCrit == TermCrit.ABSOLUTE, strat);
|
|
// Check termination
|
|
done = maxDiff < termCritParam;
|
|
}
|
|
|
|
// Finished Gauss-Seidel
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Gauss-Seidel");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Non-convergence is an error (usually)
|
|
if (!done && errorOnNonConverge) {
|
|
String msg = "Iterative method did not converge within " + iters + " iterations.";
|
|
msg += "\nConsider using a different numerical method or increasing the maximum number of iterations";
|
|
throw new PrismException(msg);
|
|
}
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute reachability probabilities using policy iteration.
|
|
* Optionally, store optimal (memoryless) strategy info.
|
|
* @param mdp: The MDP
|
|
* @param no: Probability 0 states
|
|
* @param yes: Probability 1 states
|
|
* @param min: Min or max probabilities (true=min, false=max)
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
*/
|
|
protected ModelCheckerResult computeReachProbsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
int i, n, iters, totalIters;
|
|
double soln[], soln2[];
|
|
boolean done;
|
|
long timer;
|
|
DTMCModelChecker mcDTMC;
|
|
DTMC dtmc;
|
|
|
|
// Re-use solution to solve each new policy (strategy)?
|
|
boolean reUseSoln = true;
|
|
|
|
// Start policy iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Create a DTMC model checker (for solving policies)
|
|
mcDTMC = new DTMCModelChecker(this);
|
|
mcDTMC.inheritSettings(this);
|
|
mcDTMC.setLog(new PrismDevNullLog());
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vectors
|
|
soln = new double[n];
|
|
soln2 = new double[n];
|
|
|
|
// Initialise solution vectors.
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
|
|
|
|
// If not passed in, create new storage for strategy and initialise
|
|
// Initial strategy just picks first choice (0) everywhere
|
|
if (strat == null) {
|
|
strat = new int[n];
|
|
for (i = 0; i < n; i++)
|
|
strat[i] = 0;
|
|
}
|
|
// Otherwise, just initialise for states not in yes/no
|
|
// (Optimal choices for yes/no should already be known)
|
|
else {
|
|
for (i = 0; i < n; i++)
|
|
if (!(no.get(i) || yes.get(i)))
|
|
strat[i] = 0;
|
|
}
|
|
|
|
// Start iterations
|
|
iters = totalIters = 0;
|
|
done = false;
|
|
while (!done) {
|
|
iters++;
|
|
// Solve induced DTMC for strategy
|
|
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
|
|
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, reUseSoln ? soln : null, null);
|
|
soln = res.soln;
|
|
totalIters += res.numIters;
|
|
// Check if optimal, improve non-optimal choices
|
|
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
|
|
done = true;
|
|
for (i = 0; i < n; i++) {
|
|
// Don't look at no/yes states - we may not have strategy info for them,
|
|
// so they might appear non-optimal
|
|
if (no.get(i) || yes.get(i))
|
|
continue;
|
|
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
|
|
done = false;
|
|
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
|
|
// Only update strategy if strictly better
|
|
if (!opt.contains(strat[i]))
|
|
strat[i] = opt.get(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finished policy iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Policy iteration");
|
|
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = totalIters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute reachability probabilities using modified policy iteration.
|
|
* @param mdp: The MDP
|
|
* @param no: Probability 0 states
|
|
* @param yes: Probability 1 states
|
|
* @param min: Min or max probabilities (true=min, false=max)
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
*/
|
|
protected ModelCheckerResult computeReachProbsModPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
int i, n, iters, totalIters;
|
|
double soln[], soln2[];
|
|
boolean done;
|
|
long timer;
|
|
DTMCModelChecker mcDTMC;
|
|
DTMC dtmc;
|
|
|
|
// Start value iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting modified policy iteration (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Create a DTMC model checker (for solving policies)
|
|
mcDTMC = new DTMCModelChecker(this);
|
|
mcDTMC.inheritSettings(this);
|
|
mcDTMC.setLog(new PrismDevNullLog());
|
|
|
|
// Limit iters for DTMC solution - this implements "modified" policy iteration
|
|
mcDTMC.setMaxIters(100);
|
|
mcDTMC.setErrorOnNonConverge(false);
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vectors
|
|
soln = new double[n];
|
|
soln2 = new double[n];
|
|
|
|
// Initialise solution vectors.
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
|
|
|
|
// If not passed in, create new storage for strategy and initialise
|
|
// Initial strategy just picks first choice (0) everywhere
|
|
if (strat == null) {
|
|
strat = new int[n];
|
|
for (i = 0; i < n; i++)
|
|
strat[i] = 0;
|
|
}
|
|
// Otherwise, just initialise for states not in yes/no
|
|
// (Optimal choices for yes/no should already be known)
|
|
else {
|
|
for (i = 0; i < n; i++)
|
|
if (!(no.get(i) || yes.get(i)))
|
|
strat[i] = 0;
|
|
}
|
|
|
|
// Start iterations
|
|
iters = totalIters = 0;
|
|
done = false;
|
|
while (!done) {
|
|
iters++;
|
|
// Solve induced DTMC for strategy
|
|
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
|
|
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, soln, null);
|
|
soln = res.soln;
|
|
totalIters += res.numIters;
|
|
// Check if optimal, improve non-optimal choices
|
|
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
|
|
done = true;
|
|
for (i = 0; i < n; i++) {
|
|
// Don't look at no/yes states - we don't store strategy info for them,
|
|
// so they might appear non-optimal
|
|
if (no.get(i) || yes.get(i))
|
|
continue;
|
|
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
|
|
done = false;
|
|
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
|
|
strat[i] = opt.get(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finished policy iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Modified policy iteration");
|
|
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = totalIters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Construct strategy information for min/max reachability probabilities.
|
|
* (More precisely, list of indices of choices resulting in min/max.)
|
|
* (Note: indices are guaranteed to be sorted in ascending order.)
|
|
* @param mdp The MDP
|
|
* @param state The state to generate strategy info for
|
|
* @param target The set of target states to reach
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param lastSoln Vector of values from which to recompute in one iteration
|
|
*/
|
|
public List<Integer> probReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
|
|
{
|
|
double val = mdp.mvMultMinMaxSingle(state, lastSoln, min, null);
|
|
return mdp.mvMultMinMaxSingleChoices(state, lastSoln, min, val);
|
|
}
|
|
|
|
/**
|
|
* Compute bounded reachability probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target} within k steps.
|
|
* @param mdp The MDP
|
|
* @param target Target states
|
|
* @param k Bound
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet target, int k, boolean min) throws PrismException
|
|
{
|
|
return computeBoundedReachProbs(mdp, null, target, k, min, null, null);
|
|
}
|
|
|
|
/**
|
|
* Compute bounded until probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target},
|
|
* within k steps, and while remaining in states in @{code remain}.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param k Bound
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeBoundedUntilProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min) throws PrismException
|
|
{
|
|
return computeBoundedReachProbs(mdp, remain, target, k, min, null, null);
|
|
}
|
|
|
|
/**
|
|
* Compute bounded reachability/until probabilities.
|
|
* i.e. compute the min/max probability of reaching a state in {@code target},
|
|
* within k steps, and while remaining in states in @{code remain}.
|
|
* @param mdp The MDP
|
|
* @param remain Remain in these states (optional: null means "all")
|
|
* @param target Target states
|
|
* @param k Bound
|
|
* @param min Min or max probabilities (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (may be overwritten)
|
|
* @param results Optional array of size k+1 to store (init state) results for each step (null if unused)
|
|
*/
|
|
public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min, double init[], double results[])
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res = null;
|
|
BitSet unknown;
|
|
int i, n, iters;
|
|
double soln[], soln2[], tmpsoln[];
|
|
long timer;
|
|
|
|
// Start bounded probabilistic reachability
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("\nStarting bounded probabilistic reachability (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector(s)
|
|
soln = new double[n];
|
|
soln2 = (init == null) ? new double[n] : init;
|
|
|
|
// Initialise solution vectors. Use passed in initial vector, if present
|
|
if (init != null) {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = target.get(i) ? 1.0 : init[i];
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = target.get(i) ? 1.0 : 0.0;
|
|
}
|
|
// Store intermediate results if required
|
|
// (compute min/max value over initial states for first step)
|
|
if (results != null) {
|
|
// TODO: whether this is min or max should be specified somehow
|
|
results[0] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
|
|
}
|
|
|
|
// Determine set of states actually need to perform computation for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(target);
|
|
if (remain != null)
|
|
unknown.and(remain);
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
while (iters < k) {
|
|
iters++;
|
|
// Matrix-vector multiply and min/max ops
|
|
mdp.mvMultMinMax(soln, min, soln2, unknown, false, null);
|
|
// Store intermediate results if required
|
|
// (compute min/max value over initial states for this step)
|
|
if (results != null) {
|
|
// TODO: whether this is min or max should be specified somehow
|
|
results[iters] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
|
|
}
|
|
// Swap vectors for next iter
|
|
tmpsoln = soln;
|
|
soln = soln2;
|
|
soln2 = tmpsoln;
|
|
}
|
|
|
|
// Finished bounded probabilistic reachability
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Bounded probabilistic reachability (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.lastSoln = soln2;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
res.timePre = 0.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute expected cumulative (step-bounded) rewards.
|
|
* i.e. compute the min/max reward accumulated within {@code k} steps.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeCumulativeRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException
|
|
{
|
|
ModelCheckerResult res = null;
|
|
int i, n, iters;
|
|
long timer;
|
|
double soln[], soln2[], tmpsoln[];
|
|
|
|
// Start expected cumulative reward
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create/initialise solution vector(s)
|
|
soln = new double[n];
|
|
soln2 = new double[n];
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = 0.0;
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
while (iters < k) {
|
|
iters++;
|
|
// Matrix-vector multiply and min/max ops
|
|
int strat[] = new int[n];
|
|
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, strat);
|
|
mainLog.println(strat);
|
|
// Swap vectors for next iter
|
|
tmpsoln = soln;
|
|
soln = soln2;
|
|
soln2 = tmpsoln;
|
|
}
|
|
|
|
// Finished value iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute expected reachability rewards.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
*/
|
|
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
|
|
{
|
|
return computeReachRewards(mdp, mdpRewards, target, min, null, null);
|
|
}
|
|
|
|
/**
|
|
* Compute expected reachability rewards.
|
|
* i.e. compute the min/max reward accumulated to reach a state in {@code target}.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (may be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).
|
|
* Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.
|
|
*/
|
|
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known)
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res = null;
|
|
BitSet inf;
|
|
int i, n, numTarget, numInf;
|
|
long timer, timerProb1;
|
|
int strat[] = null;
|
|
// Local copy of setting
|
|
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
|
|
|
|
// Switch to a supported method, if necessary
|
|
if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) {
|
|
mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
|
|
mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
|
|
}
|
|
|
|
// Check for some unsupported combinations
|
|
if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
|
|
if (known != null) {
|
|
throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
|
|
}
|
|
}
|
|
|
|
// Start expected reachability
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("\nStarting expected reachability (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Check for deadlocks in non-target state (because breaks e.g. prob1)
|
|
mdp.checkForDeadlocks(target);
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Optimise by enlarging target set (if more info is available)
|
|
if (init != null && known != null) {
|
|
BitSet targetNew = new BitSet(n);
|
|
for (i = 0; i < n; i++) {
|
|
targetNew.set(i, target.get(i) || (known.get(i) && init[i] == 0.0));
|
|
}
|
|
target = targetNew;
|
|
}
|
|
|
|
// If required, create/initialise strategy storage
|
|
// Set choices to -1, denoting unknown
|
|
// (except for target states, which are -2, denoting arbitrary)
|
|
if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
|
|
strat = new int[n];
|
|
for (i = 0; i < n; i++) {
|
|
strat[i] = target.get(i) ? -2 : -1;
|
|
}
|
|
}
|
|
|
|
// Precomputation (not optional)
|
|
timerProb1 = System.currentTimeMillis();
|
|
inf = prob1(mdp, null, target, !min, strat);
|
|
inf.flip(0, n);
|
|
timerProb1 = System.currentTimeMillis() - timerProb1;
|
|
|
|
// Print results of precomputation
|
|
numTarget = target.cardinality();
|
|
numInf = inf.cardinality();
|
|
mainLog.println("target=" + numTarget + ", inf=" + numInf + ", rest=" + (n - (numTarget + numInf)));
|
|
|
|
// If required, generate strategy for "inf" states.
|
|
if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
|
|
if (min) {
|
|
// If min reward is infinite, all choices give infinity
|
|
// So the choice can be arbitrary, denoted by -2;
|
|
for (i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
|
|
strat[i] = -2;
|
|
}
|
|
} else {
|
|
// If max reward is infinite, there is at least one choice giving infinity.
|
|
// So we pick, for all "inf" states, the first choice for which some transitions stays in "inf".
|
|
for (i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
|
|
int numChoices = mdp.getNumChoices(i);
|
|
for (int k = 0; k < numChoices; k++) {
|
|
if (mdp.allSuccessorsInSet(i, k, inf)) {
|
|
strat[i] = k;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Compute rewards
|
|
switch (mdpSolnMethod) {
|
|
case VALUE_ITERATION:
|
|
res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known, strat);
|
|
break;
|
|
case GAUSS_SEIDEL:
|
|
res = computeReachRewardsGaussSeidel(mdp, mdpRewards, target, inf, min, init, known, strat);
|
|
break;
|
|
case POLICY_ITERATION:
|
|
res = computeReachRewardsPolIter(mdp, mdpRewards, target, inf, min, strat);
|
|
break;
|
|
default:
|
|
throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
|
|
}
|
|
|
|
// Store strategy
|
|
if (genStrat) {
|
|
res.strat = new MDStrategyArray(mdp, strat);
|
|
}
|
|
// Export adversary
|
|
if (exportAdv) {
|
|
// Prune strategy
|
|
restrictStrategyToReachableStates(mdp, strat);
|
|
// Print strategy
|
|
mainLog.print("Strat:");
|
|
for (i = 0; i < n; i++) {
|
|
mainLog.print(" " + i + ":" + strat[i]);
|
|
}
|
|
mainLog.println();
|
|
// Export
|
|
PrismLog out = new PrismFileLog(exportAdvFilename);
|
|
new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
|
|
out.close();
|
|
}
|
|
|
|
// Finished expected reachability
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.println("Expected reachability took " + timer / 1000.0 + " seconds.");
|
|
|
|
// Update time taken
|
|
res.timeTaken = timer / 1000.0;
|
|
res.timePre = timerProb1 / 1000.0;
|
|
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute expected reachability rewards using value iteration.
|
|
* Optionally, store optimal (memoryless) strategy info.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param inf States for which reward is infinite
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (will be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
|
|
*/
|
|
protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[])
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
BitSet unknown;
|
|
int i, n, iters;
|
|
double soln[], soln2[], tmpsoln[];
|
|
boolean done;
|
|
long timer;
|
|
|
|
// Start value iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting value iteration (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector(s)
|
|
soln = new double[n];
|
|
soln2 = (init == null) ? new double[n] : init;
|
|
|
|
// Initialise solution vectors. Use (where available) the following in order of preference:
|
|
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0
|
|
if (init != null) {
|
|
if (known != null) {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
|
|
}
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
|
|
}
|
|
|
|
// Determine set of states actually need to compute values for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(target);
|
|
unknown.andNot(inf);
|
|
if (known != null)
|
|
unknown.andNot(known);
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
done = false;
|
|
while (!done && iters < maxIters) {
|
|
//mainLog.println(soln);
|
|
iters++;
|
|
// Matrix-vector multiply and min/max ops
|
|
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, unknown, false, strat);
|
|
// Check termination
|
|
done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE);
|
|
// Swap vectors for next iter
|
|
tmpsoln = soln;
|
|
soln = soln2;
|
|
soln2 = tmpsoln;
|
|
}
|
|
|
|
// Finished value iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Value iteration (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Non-convergence is an error (usually)
|
|
if (!done && errorOnNonConverge) {
|
|
String msg = "Iterative method did not converge within " + iters + " iterations.";
|
|
msg += "\nConsider using a different numerical method or increasing the maximum number of iterations";
|
|
throw new PrismException(msg);
|
|
}
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute expected reachability rewards using Gauss-Seidel (including Jacobi-style updates).
|
|
* Optionally, store optimal (memoryless) strategy info.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param inf States for which reward is infinite
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
* @param init Optionally, an initial solution vector (will be overwritten)
|
|
* @param known Optionally, a set of states for which the exact answer is known
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
|
|
*/
|
|
protected ModelCheckerResult computeReachRewardsGaussSeidel(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[],
|
|
BitSet known, int strat[]) throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
BitSet unknown;
|
|
int i, n, iters;
|
|
double soln[], maxDiff;
|
|
boolean done;
|
|
long timer;
|
|
|
|
// Start value iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector(s)
|
|
soln = (init == null) ? new double[n] : init;
|
|
|
|
// Initialise solution vector. Use (where available) the following in order of preference:
|
|
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0
|
|
if (init != null) {
|
|
if (known != null) {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
|
|
}
|
|
} else {
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
|
|
}
|
|
|
|
// Determine set of states actually need to compute values for
|
|
unknown = new BitSet();
|
|
unknown.set(0, n);
|
|
unknown.andNot(target);
|
|
unknown.andNot(inf);
|
|
if (known != null)
|
|
unknown.andNot(known);
|
|
|
|
// Start iterations
|
|
iters = 0;
|
|
done = false;
|
|
while (!done && iters < maxIters) {
|
|
//mainLog.println(soln);
|
|
iters++;
|
|
// Matrix-vector multiply and min/max ops
|
|
maxDiff = mdp.mvMultRewGSMinMax(soln, mdpRewards, min, unknown, false, termCrit == TermCrit.ABSOLUTE, strat);
|
|
// Check termination
|
|
done = maxDiff < termCritParam;
|
|
}
|
|
|
|
// Finished Gauss-Seidel
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Gauss-Seidel (" + (min ? "min" : "max") + ")");
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Non-convergence is an error (usually)
|
|
if (!done && errorOnNonConverge) {
|
|
String msg = "Iterative method did not converge within " + iters + " iterations.";
|
|
msg += "\nConsider using a different numerical method or increasing the maximum number of iterations";
|
|
throw new PrismException(msg);
|
|
}
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = iters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Compute expected reachability rewards using policy iteration.
|
|
* The array {@code strat} is used both to pass in the initial strategy for policy iteration,
|
|
* and as storage for the resulting optimal strategy (if needed).
|
|
* Passing in an initial strategy is required when some states have infinite reward,
|
|
* to avoid the possibility of policy iteration getting stuck on an infinite-value strategy.
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param target Target states
|
|
* @param inf States for which reward is infinite
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
|
|
*/
|
|
protected ModelCheckerResult computeReachRewardsPolIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, int strat[])
|
|
throws PrismException
|
|
{
|
|
ModelCheckerResult res;
|
|
int i, n, iters, totalIters;
|
|
double soln[], soln2[];
|
|
boolean done;
|
|
long timer;
|
|
DTMCModelChecker mcDTMC;
|
|
DTMC dtmc;
|
|
MCRewards mcRewards;
|
|
|
|
// Re-use solution to solve each new policy (strategy)?
|
|
boolean reUseSoln = true;
|
|
|
|
// Start policy iteration
|
|
timer = System.currentTimeMillis();
|
|
mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
|
|
|
|
// Create a DTMC model checker (for solving policies)
|
|
mcDTMC = new DTMCModelChecker(this);
|
|
mcDTMC.inheritSettings(this);
|
|
mcDTMC.setLog(new PrismDevNullLog());
|
|
|
|
// Store num states
|
|
n = mdp.getNumStates();
|
|
|
|
// Create solution vector(s)
|
|
soln = new double[n];
|
|
soln2 = new double[n];
|
|
|
|
// Initialise solution vectors.
|
|
for (i = 0; i < n; i++)
|
|
soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
|
|
|
|
// If not passed in, create new storage for strategy and initialise
|
|
// Initial strategy just picks first choice (0) everywhere
|
|
if (strat == null) {
|
|
strat = new int[n];
|
|
for (i = 0; i < n; i++)
|
|
strat[i] = 0;
|
|
}
|
|
|
|
// Start iterations
|
|
iters = totalIters = 0;
|
|
done = false;
|
|
while (!done && iters < maxIters) {
|
|
iters++;
|
|
// Solve induced DTMC for strategy
|
|
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
|
|
mcRewards = new MCRewardsFromMDPRewards(mdpRewards, strat);
|
|
res = mcDTMC.computeReachRewardsValIter(dtmc, mcRewards, target, inf, reUseSoln ? soln : null, null);
|
|
soln = res.soln;
|
|
totalIters += res.numIters;
|
|
// Check if optimal, improve non-optimal choices
|
|
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null);
|
|
done = true;
|
|
for (i = 0; i < n; i++) {
|
|
// Don't look at target/inf states - we may not have strategy info for them,
|
|
// so they might appear non-optimal
|
|
if (target.get(i) || inf.get(i))
|
|
continue;
|
|
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
|
|
done = false;
|
|
List<Integer> opt = mdp.mvMultRewMinMaxSingleChoices(i, soln, mdpRewards, min, soln2[i]);
|
|
// Only update strategy if strictly better
|
|
if (!opt.contains(strat[i]))
|
|
strat[i] = opt.get(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Finished policy iteration
|
|
timer = System.currentTimeMillis() - timer;
|
|
mainLog.print("Policy iteration");
|
|
mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");
|
|
|
|
// Return results
|
|
res = new ModelCheckerResult();
|
|
res.soln = soln;
|
|
res.numIters = totalIters;
|
|
res.timeTaken = timer / 1000.0;
|
|
return res;
|
|
}
|
|
|
|
/**
|
|
* Construct strategy information for min/max expected reachability.
|
|
* (More precisely, list of indices of choices resulting in min/max.)
|
|
* (Note: indices are guaranteed to be sorted in ascending order.)
|
|
* @param mdp The MDP
|
|
* @param mdpRewards The rewards
|
|
* @param state The state to generate strategy info for
|
|
* @param target The set of target states to reach
|
|
* @param min Min or max rewards (true=min, false=max)
|
|
* @param lastSoln Vector of values from which to recompute in one iteration
|
|
*/
|
|
public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
|
|
{
|
|
double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null);
|
|
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
|
|
}
|
|
|
|
/**
|
|
* Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
|
|
* to the states of the MDP that are reachable under that strategy.
|
|
* @param mdp The MDP
|
|
* @param strat The strategy
|
|
*/
|
|
public void restrictStrategyToReachableStates(MDP mdp, int strat[])
|
|
{
|
|
BitSet restrict = new BitSet();
|
|
BitSet explore = new BitSet();
|
|
// Get initial states
|
|
for (int is : mdp.getInitialStates()) {
|
|
restrict.set(is);
|
|
explore.set(is);
|
|
}
|
|
// Compute reachable states (store in 'restrict')
|
|
boolean foundMore = true;
|
|
while (foundMore) {
|
|
foundMore = false;
|
|
for (int s = explore.nextSetBit(0); s >= 0; s = explore.nextSetBit(s + 1)) {
|
|
explore.set(s, false);
|
|
if (strat[s] >= 0) {
|
|
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, strat[s]);
|
|
while (iter.hasNext()) {
|
|
Map.Entry<Integer, Double> e = iter.next();
|
|
int dest = e.getKey();
|
|
if (!restrict.get(dest)) {
|
|
foundMore = true;
|
|
restrict.set(dest);
|
|
explore.set(dest);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Set strategy choice for non-reachable state to -1
|
|
int n = mdp.getNumStates();
|
|
for (int s = restrict.nextClearBit(0); s < n; s = restrict.nextClearBit(s + 1)) {
|
|
strat[s] = -3;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Simple test program.
|
|
*/
|
|
public static void main(String args[])
|
|
{
|
|
MDPModelChecker mc;
|
|
MDPSimple mdp;
|
|
ModelCheckerResult res;
|
|
BitSet init, target;
|
|
Map<String, BitSet> labels;
|
|
boolean min = true;
|
|
try {
|
|
mc = new MDPModelChecker(null);
|
|
mdp = new MDPSimple();
|
|
mdp.buildFromPrismExplicit(args[0]);
|
|
//System.out.println(mdp);
|
|
labels = mc.loadLabelsFile(args[1]);
|
|
//System.out.println(labels);
|
|
init = labels.get("init");
|
|
target = labels.get(args[2]);
|
|
if (target == null)
|
|
throw new PrismException("Unknown label \"" + args[2] + "\"");
|
|
for (int i = 3; i < args.length; i++) {
|
|
if (args[i].equals("-min"))
|
|
min = true;
|
|
else if (args[i].equals("-max"))
|
|
min = false;
|
|
else if (args[i].equals("-nopre"))
|
|
mc.setPrecomp(false);
|
|
}
|
|
res = mc.computeReachProbs(mdp, target, min);
|
|
System.out.println(res.soln[init.nextSetBit(0)]);
|
|
} catch (PrismException e) {
|
|
System.out.println(e);
|
|
}
|
|
}
|
|
}
|