//============================================================================== // // Copyright (c) 2002- // Authors: // * Dave Parker (University of Oxford) // //------------------------------------------------------------------------------ // // This file is part of PRISM. // // PRISM is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // PRISM is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with PRISM; if not, write to the Free Software Foundation, // Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // //============================================================================== package explicit; import java.util.BitSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Vector; import parser.ast.Expression; import prism.DRA; import prism.Pair; import prism.PrismComponent; import prism.PrismDevNullLog; import prism.PrismException; import prism.PrismFileLog; import prism.PrismLog; import prism.PrismUtils; import strat.MDStrategyArray; import explicit.rewards.MCRewards; import explicit.rewards.MCRewardsFromMDPRewards; import explicit.rewards.MDPRewards; /** * Explicit-state model checker for Markov decision processes (MDPs). */ public class MDPModelChecker extends ProbModelChecker { /** * Create a new MDPModelChecker, inherit basic state from parent (unless null). */ public MDPModelChecker(PrismComponent parent) throws PrismException { super(parent); } // Model checking functions @Override protected StateValues checkProbPathFormulaLTL(Model model, Expression expr, boolean qual, MinMax minMax, BitSet statesOfInterest) throws PrismException { LTLModelChecker mcLtl; StateValues probsProduct, probs; Expression ltl; DRA dra; NondetModel modelProduct; MDPModelChecker mcProduct; long time; // Can't do LTL with time-bounded variants of the temporal operators if (Expression.containsTemporalTimeBounds(expr)) { throw new PrismException("Time-bounded operators not supported in LTL: " + expr); } // For LTL model checking routines mcLtl = new LTLModelChecker(this); // Model check maximal state formulas Vector labelBS = new Vector(); ltl = mcLtl.checkMaximalStateFormulas(this, model, expr.deepCopy(), labelBS); // Convert LTL formula to deterministic Rabin automaton (DRA) // For min probabilities, need to negate the formula // (add parentheses to allow re-parsing if required) if (minMax.isMin()) { ltl = Expression.Not(Expression.Parenth(ltl)); } mainLog.println("\nBuilding deterministic Rabin automaton (for " + ltl + ")..."); time = System.currentTimeMillis(); dra = LTLModelChecker.convertLTLFormulaToDRA(ltl); int draSize = dra.size(); mainLog.println("DRA has " + dra.size() + " states, " + dra.getNumAcceptancePairs() + " pairs."); time = System.currentTimeMillis() - time; mainLog.println("Time for Rabin translation: " + time / 1000.0 + " seconds."); // If required, export DRA if (settings.getExportPropAut()) { mainLog.println("Exporting DRA to file \"" + settings.getExportPropAutFilename() + "\"..."); PrismLog out = new PrismFileLog(settings.getExportPropAutFilename()); out.println(dra); out.close(); //dra.printDot(new java.io.PrintStream("dra.dot")); } // Build product of MDP and automaton mainLog.println("\nConstructing MDP-DRA product..."); Pair pair = mcLtl.constructProductMDP(dra, (MDP) model, labelBS, statesOfInterest); modelProduct = pair.first; int invMap[] = pair.second; // Find accepting MECs + compute reachability probabilities mainLog.println("\nFinding accepting MECs..."); BitSet acceptingMECs = mcLtl.findAcceptingECStatesForRabin(dra, modelProduct, invMap); mainLog.println("\nComputing reachability probabilities..."); mcProduct = new MDPModelChecker(this); mcProduct.inheritSettings(this); probsProduct = StateValues.createFromDoubleArray(mcProduct.computeReachProbs((MDP) modelProduct, acceptingMECs, false).soln, modelProduct); // Subtract from 1 if we're model checking a negated formula for regular Pmin if (minMax.isMin()) { probsProduct.timesConstant(-1.0); probsProduct.plusConstant(1.0); } // Mapping probabilities in the original model double[] probsProductDbl = probsProduct.getDoubleArray(); double[] probsDbl = new double[model.getNumStates()]; // Get the probabilities for the original model by taking the initial states // of the product and projecting back to the states of the original model for (int i : modelProduct.getInitialStates()) { int s = invMap[i] / draSize; probsDbl[s] = probsProductDbl[i]; } probs = StateValues.createFromDoubleArray(probsDbl, model); probsProduct.clear(); return probs; } // Numerical computation functions /** * Compute next=state probabilities. * i.e. compute the probability of being in a state in {@code target} in the next step. * @param mdp The MDP * @param target Target states * @param min Min or max probabilities (true=min, false=max) */ public ModelCheckerResult computeNextProbs(MDP mdp, BitSet target, boolean min) throws PrismException { ModelCheckerResult res = null; int n; double soln[], soln2[]; long timer; timer = System.currentTimeMillis(); // Store num states n = mdp.getNumStates(); // Create/initialise solution vector(s) soln = Utils.bitsetToDoubleArray(target, n); soln2 = new double[n]; // Next-step probabilities mdp.mvMultMinMax(soln, min, soln2, null, false, null); // Return results res = new ModelCheckerResult(); res.soln = soln2; res.numIters = 1; res.timeTaken = timer / 1000.0; return res; } /** * Given a value vector x, compute the probability: * v(s) = min/max sched [ Sum_s' P_sched(s,s')*x(s') ] for s labeled with a, * v(s) = 0 for s not labeled with a. * * Clears the StateValues object x. * * @param tr the transition matrix * @param a the set of states labeled with a * @param x the value vector * @param min compute min instead of max */ public double[] computeRestrictedNext(MDP mdp, BitSet a, double[] x, boolean min) { int n; double soln[]; // Store num states n = mdp.getNumStates(); // initialized to 0.0 soln = new double[n]; // Next-step probabilities multiplication // restricted to a states mdp.mvMultMinMax(x, min, soln, a, false, null); return soln; } /** * Compute reachability probabilities. * i.e. compute the min/max probability of reaching a state in {@code target}. * @param mdp The MDP * @param target Target states * @param min Min or max probabilities (true=min, false=max) */ public ModelCheckerResult computeReachProbs(MDP mdp, BitSet target, boolean min) throws PrismException { return computeReachProbs(mdp, null, target, min, null, null); } /** * Compute until probabilities. * i.e. compute the min/max probability of reaching a state in {@code target}, * while remaining in those in @{code remain}. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param min Min or max probabilities (true=min, false=max) */ public ModelCheckerResult computeUntilProbs(MDP mdp, BitSet remain, BitSet target, boolean min) throws PrismException { return computeReachProbs(mdp, remain, target, min, null, null); } /** * Compute reachability/until probabilities. * i.e. compute the min/max probability of reaching a state in {@code target}, * while remaining in those in @{code remain}. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param min Min or max probabilities (true=min, false=max) * @param init Optionally, an initial solution vector (may be overwritten) * @param known Optionally, a set of states for which the exact answer is known * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values). * Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration. */ public ModelCheckerResult computeReachProbs(MDP mdp, BitSet remain, BitSet target, boolean min, double init[], BitSet known) throws PrismException { ModelCheckerResult res = null; BitSet targetOrig, no, yes; int i, n, numYes, numNo; long timer, timerProb0, timerProb1; int strat[] = null; // Local copy of setting MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod; // Switch to a supported method, if necessary if (mdpSolnMethod == MDPSolnMethod.LINEAR_PROGRAMMING) { mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL; mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\""); } // Check for some unsupported combinations if (mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION && valIterDir == ValIterDir.ABOVE) { if (!(precomp && prob0)) throw new PrismException("Precomputation (Prob0) must be enabled for value iteration from above"); if (!min) throw new PrismException("Value iteration from above only works for minimum probabilities"); } if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION || mdpSolnMethod == MDPSolnMethod.MODIFIED_POLICY_ITERATION) { if (known != null) { throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states"); } } // Start probabilistic reachability timer = System.currentTimeMillis(); mainLog.println("\nStarting probabilistic reachability (" + (min ? "min" : "max") + ")..."); // Check for deadlocks in non-target state (because breaks e.g. prob1) mdp.checkForDeadlocks(target); // Store num states n = mdp.getNumStates(); // Optimise by enlarging target set (if more info is available) targetOrig = target; if (init != null && known != null) { target = new BitSet(n); for (i = 0; i < n; i++) { target.set(i, targetOrig.get(i) || (known.get(i) && init[i] == 1.0)); } } // If required, create/initialise strategy storage // Set choices to -1, denoting unknown // (except for target states, which are -2, denoting arbitrary) if (genStrat || exportAdv) { strat = new int[n]; for (i = 0; i < n; i++) { strat[i] = target.get(i) ? -2 : -1; } } // Precomputation timerProb0 = System.currentTimeMillis(); if (precomp && prob0) { no = prob0(mdp, remain, target, min, strat); } else { no = new BitSet(); } timerProb0 = System.currentTimeMillis() - timerProb0; timerProb1 = System.currentTimeMillis(); if (precomp && prob1) { yes = prob1(mdp, remain, target, min, strat); } else { yes = (BitSet) target.clone(); } timerProb1 = System.currentTimeMillis() - timerProb1; // Print results of precomputation numYes = yes.cardinality(); numNo = no.cardinality(); mainLog.println("target=" + target.cardinality() + ", yes=" + numYes + ", no=" + numNo + ", maybe=" + (n - (numYes + numNo))); // If still required, store strategy for no/yes (0/1) states. // This is just for the cases max=0 and min=1, where arbitrary choices suffice (denoted by -2) if (genStrat || exportAdv) { if (min) { for (i = yes.nextSetBit(0); i >= 0; i = yes.nextSetBit(i + 1)) { if (!target.get(i)) strat[i] = -2; } } else { for (i = no.nextSetBit(0); i >= 0; i = no.nextSetBit(i + 1)) { strat[i] = -2; } } } // Compute probabilities (if needed) if (numYes + numNo < n) { switch (mdpSolnMethod) { case VALUE_ITERATION: res = computeReachProbsValIter(mdp, no, yes, min, init, known, strat); break; case GAUSS_SEIDEL: res = computeReachProbsGaussSeidel(mdp, no, yes, min, init, known, strat); break; case POLICY_ITERATION: res = computeReachProbsPolIter(mdp, no, yes, min, strat); break; case MODIFIED_POLICY_ITERATION: res = computeReachProbsModPolIter(mdp, no, yes, min, strat); break; default: throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName()); } } else { res = new ModelCheckerResult(); res.soln = Utils.bitsetToDoubleArray(yes, n); } // Finished probabilistic reachability timer = System.currentTimeMillis() - timer; mainLog.println("Probabilistic reachability took " + timer / 1000.0 + " seconds."); // Store strategy if (genStrat) { res.strat = new MDStrategyArray(mdp, strat); } // Export adversary if (exportAdv) { // Prune strategy restrictStrategyToReachableStates(mdp, strat); // Print strategy mainLog.print("Strat:"); for (i = 0; i < n; i++) { mainLog.print(" " + i + ":" + strat[i]); } mainLog.println(); // Export PrismLog out = new PrismFileLog(exportAdvFilename); new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out); out.close(); } // Update time taken res.timeTaken = timer / 1000.0; res.timeProb0 = timerProb0 / 1000.0; res.timePre = (timerProb0 + timerProb1) / 1000.0; return res; } /** * Prob0 precomputation algorithm. * i.e. determine the states of an MDP which, with min/max probability 0, * reach a state in {@code target}, while remaining in those in @{code remain}. * {@code min}=true gives Prob0E, {@code min}=false gives Prob0A. * Optionally, for min only, store optimal (memoryless) strategy info for 0 states. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param min Min or max probabilities (true=min, false=max) * @param strat Storage for (memoryless) strategy choice indices (ignored if null) */ public BitSet prob0(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[]) { int n, iters; BitSet u, soln, unknown; boolean u_done; long timer; // Start precomputation timer = System.currentTimeMillis(); mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")..."); // Special case: no target states if (target.cardinality() == 0) { soln = new BitSet(mdp.getNumStates()); soln.set(0, mdp.getNumStates()); return soln; } // Initialise vectors n = mdp.getNumStates(); u = new BitSet(n); soln = new BitSet(n); // Determine set of states actually need to perform computation for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(target); if (remain != null) unknown.and(remain); // Fixed point loop iters = 0; u_done = false; // Least fixed point - should start from 0 but we optimise by // starting from 'target', thus bypassing first iteration u.or(target); soln.or(target); while (!u_done) { iters++; // Single step of Prob0 mdp.prob0step(unknown, u, min, soln); // Check termination u_done = soln.equals(u); // u = soln u.clear(); u.or(soln); } // Negate u.flip(0, n); // Finished precomputation timer = System.currentTimeMillis() - timer; mainLog.print("Prob0 (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // If required, generate strategy. This is for min probs, // so it can be done *after* the main prob0 algorithm (unlike for prob1). // We simply pick, for all "no" states, the first choice for which all transitions stay in "no" if (strat != null) { for (int i = u.nextSetBit(0); i >= 0; i = u.nextSetBit(i + 1)) { int numChoices = mdp.getNumChoices(i); for (int k = 0; k < numChoices; k++) { if (mdp.allSuccessorsInSet(i, k, u)) { strat[i] = k; continue; } } } } return u; } /** * Prob1 precomputation algorithm. * i.e. determine the states of an MDP which, with min/max probability 1, * reach a state in {@code target}, while remaining in those in @{code remain}. * {@code min}=true gives Prob1A, {@code min}=false gives Prob1E. * Optionally, for max only, store optimal (memoryless) strategy info for 1 states. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param min Min or max probabilities (true=min, false=max) * @param strat Storage for (memoryless) strategy choice indices (ignored if null) */ public BitSet prob1(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[]) { int n, iters; BitSet u, v, soln, unknown; boolean u_done, v_done; long timer; // Start precomputation timer = System.currentTimeMillis(); mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")..."); // Special case: no target states if (target.cardinality() == 0) { return new BitSet(mdp.getNumStates()); } // Initialise vectors n = mdp.getNumStates(); u = new BitSet(n); v = new BitSet(n); soln = new BitSet(n); // Determine set of states actually need to perform computation for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(target); if (remain != null) unknown.and(remain); // Nested fixed point loop iters = 0; u_done = false; // Greatest fixed point u.set(0, n); while (!u_done) { v_done = false; // Least fixed point - should start from 0 but we optimise by // starting from 'target', thus bypassing first iteration v.clear(); v.or(target); soln.clear(); soln.or(target); while (!v_done) { iters++; // Single step of Prob1 if (min) mdp.prob1Astep(unknown, u, v, soln); else mdp.prob1Estep(unknown, u, v, soln, null); // Check termination (inner) v_done = soln.equals(v); // v = soln v.clear(); v.or(soln); } // Check termination (outer) u_done = v.equals(u); // u = v u.clear(); u.or(v); } // If we need to generate a strategy, do another iteration of the inner loop for this // We could do this during the main double fixed point above, but we would generate surplus // strategy info for non-1 states during early iterations of the outer loop, // which are not straightforward to remove since this method does not know which states // already have valid strategy info from Prob0. // Notice that we only need to look at states in u (since we already know the answer), // so we restrict 'unknown' further unknown.and(u); if (!min && strat != null) { v_done = false; v.clear(); v.or(target); soln.clear(); soln.or(target); while (!v_done) { mdp.prob1Estep(unknown, u, v, soln, strat); v_done = soln.equals(v); v.clear(); v.or(soln); } u_done = v.equals(u); } // Finished precomputation timer = System.currentTimeMillis() - timer; mainLog.print("Prob1 (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); return u; } /** * Compute reachability probabilities using value iteration. * Optionally, store optimal (memoryless) strategy info. * @param mdp The MDP * @param no Probability 0 states * @param yes Probability 1 states * @param min Min or max probabilities (true=min, false=max) * @param init Optionally, an initial solution vector (will be overwritten) * @param known Optionally, a set of states for which the exact answer is known * @param strat Storage for (memoryless) strategy choice indices (ignored if null) * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ protected ModelCheckerResult computeReachProbsValIter(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[]) throws PrismException { ModelCheckerResult res; BitSet unknown; int i, n, iters; double soln[], soln2[], tmpsoln[], initVal; boolean done; long timer; // Start value iteration timer = System.currentTimeMillis(); mainLog.println("Starting value iteration (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create solution vector(s) soln = new double[n]; soln2 = (init == null) ? new double[n] : init; // Initialise solution vectors. Use (where available) the following in order of preference: // (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal // where initVal is 0.0 or 1.0, depending on whether we converge from below/above. initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0; if (init != null) { if (known != null) { for (i = 0; i < n; i++) soln[i] = soln2[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i]; } else { for (i = 0; i < n; i++) soln[i] = soln2[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i]; } } else { for (i = 0; i < n; i++) soln[i] = soln2[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal; } // Determine set of states actually need to compute values for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(yes); unknown.andNot(no); if (known != null) unknown.andNot(known); // Start iterations iters = 0; done = false; while (!done && iters < maxIters) { iters++; // Matrix-vector multiply and min/max ops mdp.mvMultMinMax(soln, min, soln2, unknown, false, strat); // Check termination done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE); // Swap vectors for next iter tmpsoln = soln; soln = soln2; soln2 = tmpsoln; } // Finished value iteration timer = System.currentTimeMillis() - timer; mainLog.print("Value iteration (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Non-convergence is an error (usually) if (!done && errorOnNonConverge) { String msg = "Iterative method did not converge within " + iters + " iterations."; msg += "\nConsider using a different numerical method or increasing the maximum number of iterations"; throw new PrismException(msg); } // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = iters; res.timeTaken = timer / 1000.0; return res; } /** * Compute reachability probabilities using Gauss-Seidel (including Jacobi-style updates). * @param mdp The MDP * @param no Probability 0 states * @param yes Probability 1 states * @param min Min or max probabilities (true=min, false=max) * @param init Optionally, an initial solution vector (will be overwritten) * @param known Optionally, a set of states for which the exact answer is known * @param strat Storage for (memoryless) strategy choice indices (ignored if null) * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ protected ModelCheckerResult computeReachProbsGaussSeidel(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[]) throws PrismException { ModelCheckerResult res; BitSet unknown; int i, n, iters; double soln[], initVal, maxDiff; boolean done; long timer; // Start value iteration timer = System.currentTimeMillis(); mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create solution vector soln = (init == null) ? new double[n] : init; // Initialise solution vector. Use (where available) the following in order of preference: // (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal // where initVal is 0.0 or 1.0, depending on whether we converge from below/above. initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0; if (init != null) { if (known != null) { for (i = 0; i < n; i++) soln[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i]; } else { for (i = 0; i < n; i++) soln[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i]; } } else { for (i = 0; i < n; i++) soln[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal; } // Determine set of states actually need to compute values for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(yes); unknown.andNot(no); if (known != null) unknown.andNot(known); // Start iterations iters = 0; done = false; while (!done && iters < maxIters) { iters++; // Matrix-vector multiply maxDiff = mdp.mvMultGSMinMax(soln, min, unknown, false, termCrit == TermCrit.ABSOLUTE, strat); // Check termination done = maxDiff < termCritParam; } // Finished Gauss-Seidel timer = System.currentTimeMillis() - timer; mainLog.print("Gauss-Seidel"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Non-convergence is an error (usually) if (!done && errorOnNonConverge) { String msg = "Iterative method did not converge within " + iters + " iterations."; msg += "\nConsider using a different numerical method or increasing the maximum number of iterations"; throw new PrismException(msg); } // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = iters; res.timeTaken = timer / 1000.0; return res; } /** * Compute reachability probabilities using policy iteration. * Optionally, store optimal (memoryless) strategy info. * @param mdp: The MDP * @param no: Probability 0 states * @param yes: Probability 1 states * @param min: Min or max probabilities (true=min, false=max) * @param strat Storage for (memoryless) strategy choice indices (ignored if null) */ protected ModelCheckerResult computeReachProbsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException { ModelCheckerResult res; int i, n, iters, totalIters; double soln[], soln2[]; boolean done; long timer; DTMCModelChecker mcDTMC; DTMC dtmc; // Re-use solution to solve each new policy (strategy)? boolean reUseSoln = true; // Start policy iteration timer = System.currentTimeMillis(); mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")..."); // Create a DTMC model checker (for solving policies) mcDTMC = new DTMCModelChecker(this); mcDTMC.inheritSettings(this); mcDTMC.setLog(new PrismDevNullLog()); // Store num states n = mdp.getNumStates(); // Create solution vectors soln = new double[n]; soln2 = new double[n]; // Initialise solution vectors. for (i = 0; i < n; i++) soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0; // If not passed in, create new storage for strategy and initialise // Initial strategy just picks first choice (0) everywhere if (strat == null) { strat = new int[n]; for (i = 0; i < n; i++) strat[i] = 0; } // Otherwise, just initialise for states not in yes/no // (Optimal choices for yes/no should already be known) else { for (i = 0; i < n; i++) if (!(no.get(i) || yes.get(i))) strat[i] = 0; } // Start iterations iters = totalIters = 0; done = false; while (!done) { iters++; // Solve induced DTMC for strategy dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat); res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, reUseSoln ? soln : null, null); soln = res.soln; totalIters += res.numIters; // Check if optimal, improve non-optimal choices mdp.mvMultMinMax(soln, min, soln2, null, false, null); done = true; for (i = 0; i < n; i++) { // Don't look at no/yes states - we may not have strategy info for them, // so they might appear non-optimal if (no.get(i) || yes.get(i)) continue; if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) { done = false; List opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]); // Only update strategy if strictly better if (!opt.contains(strat[i])) strat[i] = opt.get(0); } } } // Finished policy iteration timer = System.currentTimeMillis() - timer; mainLog.print("Policy iteration"); mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds."); // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = totalIters; res.timeTaken = timer / 1000.0; return res; } /** * Compute reachability probabilities using modified policy iteration. * @param mdp: The MDP * @param no: Probability 0 states * @param yes: Probability 1 states * @param min: Min or max probabilities (true=min, false=max) * @param strat Storage for (memoryless) strategy choice indices (ignored if null) */ protected ModelCheckerResult computeReachProbsModPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException { ModelCheckerResult res; int i, n, iters, totalIters; double soln[], soln2[]; boolean done; long timer; DTMCModelChecker mcDTMC; DTMC dtmc; // Start value iteration timer = System.currentTimeMillis(); mainLog.println("Starting modified policy iteration (" + (min ? "min" : "max") + ")..."); // Create a DTMC model checker (for solving policies) mcDTMC = new DTMCModelChecker(this); mcDTMC.inheritSettings(this); mcDTMC.setLog(new PrismDevNullLog()); // Limit iters for DTMC solution - this implements "modified" policy iteration mcDTMC.setMaxIters(100); mcDTMC.setErrorOnNonConverge(false); // Store num states n = mdp.getNumStates(); // Create solution vectors soln = new double[n]; soln2 = new double[n]; // Initialise solution vectors. for (i = 0; i < n; i++) soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0; // If not passed in, create new storage for strategy and initialise // Initial strategy just picks first choice (0) everywhere if (strat == null) { strat = new int[n]; for (i = 0; i < n; i++) strat[i] = 0; } // Otherwise, just initialise for states not in yes/no // (Optimal choices for yes/no should already be known) else { for (i = 0; i < n; i++) if (!(no.get(i) || yes.get(i))) strat[i] = 0; } // Start iterations iters = totalIters = 0; done = false; while (!done) { iters++; // Solve induced DTMC for strategy dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat); res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, soln, null); soln = res.soln; totalIters += res.numIters; // Check if optimal, improve non-optimal choices mdp.mvMultMinMax(soln, min, soln2, null, false, null); done = true; for (i = 0; i < n; i++) { // Don't look at no/yes states - we don't store strategy info for them, // so they might appear non-optimal if (no.get(i) || yes.get(i)) continue; if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) { done = false; List opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]); strat[i] = opt.get(0); } } } // Finished policy iteration timer = System.currentTimeMillis() - timer; mainLog.print("Modified policy iteration"); mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds."); // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = totalIters; res.timeTaken = timer / 1000.0; return res; } /** * Construct strategy information for min/max reachability probabilities. * (More precisely, list of indices of choices resulting in min/max.) * (Note: indices are guaranteed to be sorted in ascending order.) * @param mdp The MDP * @param state The state to generate strategy info for * @param target The set of target states to reach * @param min Min or max probabilities (true=min, false=max) * @param lastSoln Vector of values from which to recompute in one iteration */ public List probReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException { double val = mdp.mvMultMinMaxSingle(state, lastSoln, min, null); return mdp.mvMultMinMaxSingleChoices(state, lastSoln, min, val); } /** * Compute bounded reachability probabilities. * i.e. compute the min/max probability of reaching a state in {@code target} within k steps. * @param mdp The MDP * @param target Target states * @param k Bound * @param min Min or max probabilities (true=min, false=max) */ public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet target, int k, boolean min) throws PrismException { return computeBoundedReachProbs(mdp, null, target, k, min, null, null); } /** * Compute bounded until probabilities. * i.e. compute the min/max probability of reaching a state in {@code target}, * within k steps, and while remaining in states in @{code remain}. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param k Bound * @param min Min or max probabilities (true=min, false=max) */ public ModelCheckerResult computeBoundedUntilProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min) throws PrismException { return computeBoundedReachProbs(mdp, remain, target, k, min, null, null); } /** * Compute bounded reachability/until probabilities. * i.e. compute the min/max probability of reaching a state in {@code target}, * within k steps, and while remaining in states in @{code remain}. * @param mdp The MDP * @param remain Remain in these states (optional: null means "all") * @param target Target states * @param k Bound * @param min Min or max probabilities (true=min, false=max) * @param init Optionally, an initial solution vector (may be overwritten) * @param results Optional array of size k+1 to store (init state) results for each step (null if unused) */ public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min, double init[], double results[]) throws PrismException { ModelCheckerResult res = null; BitSet unknown; int i, n, iters; double soln[], soln2[], tmpsoln[]; long timer; // Start bounded probabilistic reachability timer = System.currentTimeMillis(); mainLog.println("\nStarting bounded probabilistic reachability (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create solution vector(s) soln = new double[n]; soln2 = (init == null) ? new double[n] : init; // Initialise solution vectors. Use passed in initial vector, if present if (init != null) { for (i = 0; i < n; i++) soln[i] = soln2[i] = target.get(i) ? 1.0 : init[i]; } else { for (i = 0; i < n; i++) soln[i] = soln2[i] = target.get(i) ? 1.0 : 0.0; } // Store intermediate results if required // (compute min/max value over initial states for first step) if (results != null) { // TODO: whether this is min or max should be specified somehow results[0] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true); } // Determine set of states actually need to perform computation for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(target); if (remain != null) unknown.and(remain); // Start iterations iters = 0; while (iters < k) { iters++; // Matrix-vector multiply and min/max ops mdp.mvMultMinMax(soln, min, soln2, unknown, false, null); // Store intermediate results if required // (compute min/max value over initial states for this step) if (results != null) { // TODO: whether this is min or max should be specified somehow results[iters] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true); } // Swap vectors for next iter tmpsoln = soln; soln = soln2; soln2 = tmpsoln; } // Finished bounded probabilistic reachability timer = System.currentTimeMillis() - timer; mainLog.print("Bounded probabilistic reachability (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Return results res = new ModelCheckerResult(); res.soln = soln; res.lastSoln = soln2; res.numIters = iters; res.timeTaken = timer / 1000.0; res.timePre = 0.0; return res; } /** * Compute expected cumulative (step-bounded) rewards. * i.e. compute the min/max reward accumulated within {@code k} steps. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param min Min or max rewards (true=min, false=max) */ public ModelCheckerResult computeCumulativeRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException { ModelCheckerResult res = null; int i, n, iters; long timer; double soln[], soln2[], tmpsoln[]; // Start expected cumulative reward timer = System.currentTimeMillis(); mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create/initialise solution vector(s) soln = new double[n]; soln2 = new double[n]; for (i = 0; i < n; i++) soln[i] = soln2[i] = 0.0; // Start iterations iters = 0; while (iters < k) { iters++; // Matrix-vector multiply and min/max ops int strat[] = new int[n]; mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, strat); mainLog.println(strat); // Swap vectors for next iter tmpsoln = soln; soln = soln2; soln2 = tmpsoln; } // Finished value iteration timer = System.currentTimeMillis() - timer; mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = iters; res.timeTaken = timer / 1000.0; return res; } /** * Compute expected reachability rewards. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param min Min or max rewards (true=min, false=max) */ public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException { return computeReachRewards(mdp, mdpRewards, target, min, null, null); } /** * Compute expected reachability rewards. * i.e. compute the min/max reward accumulated to reach a state in {@code target}. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param min Min or max rewards (true=min, false=max) * @param init Optionally, an initial solution vector (may be overwritten) * @param known Optionally, a set of states for which the exact answer is known * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values). * Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration. */ public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known) throws PrismException { ModelCheckerResult res = null; BitSet inf; int i, n, numTarget, numInf; long timer, timerProb1; int strat[] = null; // Local copy of setting MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod; // Switch to a supported method, if necessary if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) { mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL; mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\""); } // Check for some unsupported combinations if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) { if (known != null) { throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states"); } } // Start expected reachability timer = System.currentTimeMillis(); mainLog.println("\nStarting expected reachability (" + (min ? "min" : "max") + ")..."); // Check for deadlocks in non-target state (because breaks e.g. prob1) mdp.checkForDeadlocks(target); // Store num states n = mdp.getNumStates(); // Optimise by enlarging target set (if more info is available) if (init != null && known != null) { BitSet targetNew = new BitSet(n); for (i = 0; i < n; i++) { targetNew.set(i, target.get(i) || (known.get(i) && init[i] == 0.0)); } target = targetNew; } // If required, create/initialise strategy storage // Set choices to -1, denoting unknown // (except for target states, which are -2, denoting arbitrary) if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) { strat = new int[n]; for (i = 0; i < n; i++) { strat[i] = target.get(i) ? -2 : -1; } } // Precomputation (not optional) timerProb1 = System.currentTimeMillis(); inf = prob1(mdp, null, target, !min, strat); inf.flip(0, n); timerProb1 = System.currentTimeMillis() - timerProb1; // Print results of precomputation numTarget = target.cardinality(); numInf = inf.cardinality(); mainLog.println("target=" + numTarget + ", inf=" + numInf + ", rest=" + (n - (numTarget + numInf))); // If required, generate strategy for "inf" states. if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) { if (min) { // If min reward is infinite, all choices give infinity // So the choice can be arbitrary, denoted by -2; for (i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) { strat[i] = -2; } } else { // If max reward is infinite, there is at least one choice giving infinity. // So we pick, for all "inf" states, the first choice for which some transitions stays in "inf". for (i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) { int numChoices = mdp.getNumChoices(i); for (int k = 0; k < numChoices; k++) { if (mdp.allSuccessorsInSet(i, k, inf)) { strat[i] = k; continue; } } } } } // Compute rewards switch (mdpSolnMethod) { case VALUE_ITERATION: res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known, strat); break; case GAUSS_SEIDEL: res = computeReachRewardsGaussSeidel(mdp, mdpRewards, target, inf, min, init, known, strat); break; case POLICY_ITERATION: res = computeReachRewardsPolIter(mdp, mdpRewards, target, inf, min, strat); break; default: throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName()); } // Store strategy if (genStrat) { res.strat = new MDStrategyArray(mdp, strat); } // Export adversary if (exportAdv) { // Prune strategy restrictStrategyToReachableStates(mdp, strat); // Print strategy mainLog.print("Strat:"); for (i = 0; i < n; i++) { mainLog.print(" " + i + ":" + strat[i]); } mainLog.println(); // Export PrismLog out = new PrismFileLog(exportAdvFilename); new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out); out.close(); } // Finished expected reachability timer = System.currentTimeMillis() - timer; mainLog.println("Expected reachability took " + timer / 1000.0 + " seconds."); // Update time taken res.timeTaken = timer / 1000.0; res.timePre = timerProb1 / 1000.0; return res; } /** * Compute expected reachability rewards using value iteration. * Optionally, store optimal (memoryless) strategy info. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param inf States for which reward is infinite * @param min Min or max rewards (true=min, false=max) * @param init Optionally, an initial solution vector (will be overwritten) * @param known Optionally, a set of states for which the exact answer is known * @param strat Storage for (memoryless) strategy choice indices (ignored if null) * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[]) throws PrismException { ModelCheckerResult res; BitSet unknown; int i, n, iters; double soln[], soln2[], tmpsoln[]; boolean done; long timer; // Start value iteration timer = System.currentTimeMillis(); mainLog.println("Starting value iteration (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create solution vector(s) soln = new double[n]; soln2 = (init == null) ? new double[n] : init; // Initialise solution vectors. Use (where available) the following in order of preference: // (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0 if (init != null) { if (known != null) { for (i = 0; i < n; i++) soln[i] = soln2[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; } else { for (i = 0; i < n; i++) soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; } } else { for (i = 0; i < n; i++) soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0; } // Determine set of states actually need to compute values for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(target); unknown.andNot(inf); if (known != null) unknown.andNot(known); // Start iterations iters = 0; done = false; while (!done && iters < maxIters) { //mainLog.println(soln); iters++; // Matrix-vector multiply and min/max ops mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, unknown, false, strat); // Check termination done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE); // Swap vectors for next iter tmpsoln = soln; soln = soln2; soln2 = tmpsoln; } // Finished value iteration timer = System.currentTimeMillis() - timer; mainLog.print("Value iteration (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Non-convergence is an error (usually) if (!done && errorOnNonConverge) { String msg = "Iterative method did not converge within " + iters + " iterations."; msg += "\nConsider using a different numerical method or increasing the maximum number of iterations"; throw new PrismException(msg); } // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = iters; res.timeTaken = timer / 1000.0; return res; } /** * Compute expected reachability rewards using Gauss-Seidel (including Jacobi-style updates). * Optionally, store optimal (memoryless) strategy info. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param inf States for which reward is infinite * @param min Min or max rewards (true=min, false=max) * @param init Optionally, an initial solution vector (will be overwritten) * @param known Optionally, a set of states for which the exact answer is known * @param strat Storage for (memoryless) strategy choice indices (ignored if null) * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ protected ModelCheckerResult computeReachRewardsGaussSeidel(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[]) throws PrismException { ModelCheckerResult res; BitSet unknown; int i, n, iters; double soln[], maxDiff; boolean done; long timer; // Start value iteration timer = System.currentTimeMillis(); mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")..."); // Store num states n = mdp.getNumStates(); // Create solution vector(s) soln = (init == null) ? new double[n] : init; // Initialise solution vector. Use (where available) the following in order of preference: // (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0 if (init != null) { if (known != null) { for (i = 0; i < n; i++) soln[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; } else { for (i = 0; i < n; i++) soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; } } else { for (i = 0; i < n; i++) soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0; } // Determine set of states actually need to compute values for unknown = new BitSet(); unknown.set(0, n); unknown.andNot(target); unknown.andNot(inf); if (known != null) unknown.andNot(known); // Start iterations iters = 0; done = false; while (!done && iters < maxIters) { //mainLog.println(soln); iters++; // Matrix-vector multiply and min/max ops maxDiff = mdp.mvMultRewGSMinMax(soln, mdpRewards, min, unknown, false, termCrit == TermCrit.ABSOLUTE, strat); // Check termination done = maxDiff < termCritParam; } // Finished Gauss-Seidel timer = System.currentTimeMillis() - timer; mainLog.print("Gauss-Seidel (" + (min ? "min" : "max") + ")"); mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); // Non-convergence is an error (usually) if (!done && errorOnNonConverge) { String msg = "Iterative method did not converge within " + iters + " iterations."; msg += "\nConsider using a different numerical method or increasing the maximum number of iterations"; throw new PrismException(msg); } // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = iters; res.timeTaken = timer / 1000.0; return res; } /** * Compute expected reachability rewards using policy iteration. * The array {@code strat} is used both to pass in the initial strategy for policy iteration, * and as storage for the resulting optimal strategy (if needed). * Passing in an initial strategy is required when some states have infinite reward, * to avoid the possibility of policy iteration getting stuck on an infinite-value strategy. * @param mdp The MDP * @param mdpRewards The rewards * @param target Target states * @param inf States for which reward is infinite * @param min Min or max rewards (true=min, false=max) * @param strat Storage for (memoryless) strategy choice indices (ignored if null) */ protected ModelCheckerResult computeReachRewardsPolIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, int strat[]) throws PrismException { ModelCheckerResult res; int i, n, iters, totalIters; double soln[], soln2[]; boolean done; long timer; DTMCModelChecker mcDTMC; DTMC dtmc; MCRewards mcRewards; // Re-use solution to solve each new policy (strategy)? boolean reUseSoln = true; // Start policy iteration timer = System.currentTimeMillis(); mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")..."); // Create a DTMC model checker (for solving policies) mcDTMC = new DTMCModelChecker(this); mcDTMC.inheritSettings(this); mcDTMC.setLog(new PrismDevNullLog()); // Store num states n = mdp.getNumStates(); // Create solution vector(s) soln = new double[n]; soln2 = new double[n]; // Initialise solution vectors. for (i = 0; i < n; i++) soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0; // If not passed in, create new storage for strategy and initialise // Initial strategy just picks first choice (0) everywhere if (strat == null) { strat = new int[n]; for (i = 0; i < n; i++) strat[i] = 0; } // Start iterations iters = totalIters = 0; done = false; while (!done && iters < maxIters) { iters++; // Solve induced DTMC for strategy dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat); mcRewards = new MCRewardsFromMDPRewards(mdpRewards, strat); res = mcDTMC.computeReachRewardsValIter(dtmc, mcRewards, target, inf, reUseSoln ? soln : null, null); soln = res.soln; totalIters += res.numIters; // Check if optimal, improve non-optimal choices mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null); done = true; for (i = 0; i < n; i++) { // Don't look at target/inf states - we may not have strategy info for them, // so they might appear non-optimal if (target.get(i) || inf.get(i)) continue; if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) { done = false; List opt = mdp.mvMultRewMinMaxSingleChoices(i, soln, mdpRewards, min, soln2[i]); // Only update strategy if strictly better if (!opt.contains(strat[i])) strat[i] = opt.get(0); } } } // Finished policy iteration timer = System.currentTimeMillis() - timer; mainLog.print("Policy iteration"); mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds."); // Return results res = new ModelCheckerResult(); res.soln = soln; res.numIters = totalIters; res.timeTaken = timer / 1000.0; return res; } /** * Construct strategy information for min/max expected reachability. * (More precisely, list of indices of choices resulting in min/max.) * (Note: indices are guaranteed to be sorted in ascending order.) * @param mdp The MDP * @param mdpRewards The rewards * @param state The state to generate strategy info for * @param target The set of target states to reach * @param min Min or max rewards (true=min, false=max) * @param lastSoln Vector of values from which to recompute in one iteration */ public List expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException { double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null); return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val); } /** * Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices, * to the states of the MDP that are reachable under that strategy. * @param mdp The MDP * @param strat The strategy */ public void restrictStrategyToReachableStates(MDP mdp, int strat[]) { BitSet restrict = new BitSet(); BitSet explore = new BitSet(); // Get initial states for (int is : mdp.getInitialStates()) { restrict.set(is); explore.set(is); } // Compute reachable states (store in 'restrict') boolean foundMore = true; while (foundMore) { foundMore = false; for (int s = explore.nextSetBit(0); s >= 0; s = explore.nextSetBit(s + 1)) { explore.set(s, false); if (strat[s] >= 0) { Iterator> iter = mdp.getTransitionsIterator(s, strat[s]); while (iter.hasNext()) { Map.Entry e = iter.next(); int dest = e.getKey(); if (!restrict.get(dest)) { foundMore = true; restrict.set(dest); explore.set(dest); } } } } } // Set strategy choice for non-reachable state to -1 int n = mdp.getNumStates(); for (int s = restrict.nextClearBit(0); s < n; s = restrict.nextClearBit(s + 1)) { strat[s] = -3; } } /** * Simple test program. */ public static void main(String args[]) { MDPModelChecker mc; MDPSimple mdp; ModelCheckerResult res; BitSet init, target; Map labels; boolean min = true; try { mc = new MDPModelChecker(null); mdp = new MDPSimple(); mdp.buildFromPrismExplicit(args[0]); //System.out.println(mdp); labels = mc.loadLabelsFile(args[1]); //System.out.println(labels); init = labels.get("init"); target = labels.get(args[2]); if (target == null) throw new PrismException("Unknown label \"" + args[2] + "\""); for (int i = 3; i < args.length; i++) { if (args[i].equals("-min")) min = true; else if (args[i].equals("-max")) min = false; else if (args[i].equals("-nopre")) mc.setPrecomp(false); } res = mc.computeReachProbs(mdp, target, min); System.out.println(res.soln[init.nextSetBit(0)]); } catch (PrismException e) { System.out.println(e); } } }