From edcac4a4a29ce992d6597d88cb086cff167c481a Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Mon, 7 Oct 2013 00:02:09 +0000 Subject: [PATCH] Added R[C<=k] operator for MDPs (sparse, explicit). git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@7508 bbc10eb1-c90d-0410-af57-cb519fbb1720 --- prism/include/PrismSparse.h | 8 ++ prism/src/explicit/MDPModelChecker.java | 97 +++++++++++++++++++++++++ prism/src/prism/NondetModelChecker.java | 66 ++++++++++++++++- prism/src/sparse/PrismSparse.java | 9 +++ 4 files changed, 179 insertions(+), 1 deletion(-) diff --git a/prism/include/PrismSparse.h b/prism/include/PrismSparse.h index 756b525d..60fadd75 100644 --- a/prism/include/PrismSparse.h +++ b/prism/include/PrismSparse.h @@ -111,6 +111,14 @@ JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetBoundedUntil JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetUntil (JNIEnv *, jclass, jlong, jlong, jobject, jlong, jlong, jint, jlong, jint, jlong, jint, jlong, jlong, jboolean, jlong); +/* + * Class: sparse_PrismSparse + * Method: PS_NondetCumulReward + * Signature: (JJJJJIJIJIIZ)J + */ +JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetCumulReward + (JNIEnv *, jclass, jlong, jlong, jlong, jlong, jlong, jint, jlong, jint, jlong, jint, jint, jboolean); + /* * Class: sparse_PrismSparse * Method: PS_NondetInstReward diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java index c11d1a59..21571153 100644 --- a/prism/src/explicit/MDPModelChecker.java +++ b/prism/src/explicit/MDPModelChecker.java @@ -36,6 +36,7 @@ import java.util.Vector; import parser.ast.Expression; import parser.ast.ExpressionTemporal; import parser.ast.ExpressionUnaryOp; +import parser.type.TypeDouble; import parser.visitor.ASTTraverse; import prism.DRA; import prism.Pair; @@ -317,6 +318,9 @@ public class MDPModelChecker extends ProbModelChecker if (expr instanceof ExpressionTemporal) { ExpressionTemporal exprTemp = (ExpressionTemporal) expr; switch (exprTemp.getOperator()) { + case ExpressionTemporal.R_C: + rewards = checkRewardCumul(model, modelRewards, exprTemp, min); + break; case ExpressionTemporal.R_F: rewards = checkRewardReach(model, modelRewards, exprTemp, min); break; @@ -331,6 +335,43 @@ public class MDPModelChecker extends ProbModelChecker return rewards; } + /** + * Compute rewards for a cumulative reward operator. + */ + protected StateValues checkRewardCumul(NondetModel model, MDPRewards modelRewards, ExpressionTemporal expr, boolean min) throws PrismException + { + int time; // time + StateValues rewards = null; + ModelCheckerResult res = null; + + // check that there is an upper time bound + if (expr.getUpperBound() == null) { + throw new PrismException("Cumulative reward operator without time bound (C) is only allowed for multi-objective queries"); + } + + // get info from inst reward + time = expr.getUpperBound().evaluateInt(constantValues); + if (time < 0) { + throw new PrismException("Invalid time bound " + time + " in cumulative reward formula"); + } + + // a trivial case: "<=0" + if (time == 0) { + rewards = new StateValues(TypeDouble.getInstance(), model.getNumStates(), new Double(0)); + } else { + // compute rewards + try { + res = computeCumulRewards((MDP) model, modelRewards, time, min); + rewards = StateValues.createFromDoubleArray(res.soln, model); + result.setStrategy(res.strat); + } catch (PrismException e) { + throw e; + } + } + + return rewards; + } + /** * Compute rewards for a reachability reward operator. */ @@ -1266,6 +1307,62 @@ public class MDPModelChecker extends ProbModelChecker return res; } + /** + * Compute expected cumulative (step-bounded) rewards. + * i.e. compute the min/max reward accumulated within {@code k} steps. + * @param mdp The MDP + * @param mdpRewards The rewards + * @param target Target states + * @param min Min or max rewards (true=min, false=max) + */ + public ModelCheckerResult computeCumulRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException + { + ModelCheckerResult res = null; + int i, n, iters; + long timer; + double soln[], soln2[], tmpsoln[]; + + // Start expected cumulative reward + timer = System.currentTimeMillis(); + mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")..."); + + // Store num states + n = mdp.getNumStates(); + + // Create/initialise solution vector(s) + soln = new double[n]; + soln2 = new double[n]; + for (i = 0; i < n; i++) + soln[i] = soln2[i] = 0.0; + + // Start iterations + iters = 0; + while (iters < k) { + iters++; + // Matrix-vector multiply and min/max ops + int strat[] = new int[n]; + mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, strat); + mainLog.println(strat); + // Swap vectors for next iter + tmpsoln = soln; + soln = soln2; + soln2 = tmpsoln; + } + + // Finished value iteration + timer = System.currentTimeMillis() - timer; + mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")"); + mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); + + // Return results + res = new ModelCheckerResult(); + res.soln = soln; + res.numIters = iters; + res.timeTaken = timer / 1000.0; + + return res; + } + /** * Compute expected reachability rewards. * @param mdp The MDP diff --git a/prism/src/prism/NondetModelChecker.java b/prism/src/prism/NondetModelChecker.java index ef1c535e..a38c9f8c 100644 --- a/prism/src/prism/NondetModelChecker.java +++ b/prism/src/prism/NondetModelChecker.java @@ -298,7 +298,8 @@ public class NondetModelChecker extends NonProbModelChecker if (expr2 instanceof ExpressionTemporal) { switch (((ExpressionTemporal) expr2).getOperator()) { case ExpressionTemporal.R_C: - throw new PrismException("Cumulative reward operator (C<=k) not yet supported for MDPs"); + rewards = checkRewardCumul((ExpressionTemporal) expr2, stateRewards, transRewards, min); + break; case ExpressionTemporal.R_I: rewards = checkRewardInst((ExpressionTemporal) expr2, stateRewards, transRewards, min); break; @@ -1590,6 +1591,39 @@ public class NondetModelChecker extends NonProbModelChecker return probs; } + // cumulative reward + + protected StateValues checkRewardCumul(ExpressionTemporal expr, JDDNode stateRewards, JDDNode transRewards, boolean min) throws PrismException + { + int time; // time + StateValues rewards = null; + + // check that there is an upper time bound + if (expr.getUpperBound() == null) { + throw new PrismException("Cumulative reward operator without time bound (C) is only allowed for multi-objective queries"); + } + + // get info from inst reward + time = expr.getUpperBound().evaluateInt(constantValues); + if (time < 0) { + throw new PrismException("Invalid time bound " + time + " in cumulative reward formula"); + } + + // a trivial case: "<=0" + if (time == 0) { + rewards = new StateValuesMTBDD(JDD.Constant(0), model); + } else { + // compute rewards + try { + rewards = computeCumulRewards(trans, stateRewards, transRewards, time, min); + } catch (PrismException e) { + throw e; + } + } + + return rewards; + } + // inst reward protected StateValues checkRewardInst(ExpressionTemporal expr, JDDNode stateRewards, JDDNode transRewards, boolean min) throws PrismException @@ -2162,6 +2196,36 @@ public class NondetModelChecker extends NonProbModelChecker return value; } + // compute cumulative rewards + + protected StateValues computeCumulRewards(JDDNode tr, JDDNode sr, JDDNode trr, int time, boolean min) throws PrismException + { + DoubleVector rewardsDV; + StateValues rewards = null; + + // compute rewards + mainLog.println("\nComputing rewards..."); + mainLog.println("Engine: " + Prism.getEngineString(engine)); + try { + switch (engine) { + case Prism.MTBDD: + throw new PrismException("MTBDD engine does not yet support this type of property (use the sparse engine instead)"); + case Prism.SPARSE: + rewardsDV = PrismSparse.NondetCumulReward(tr, sr, trr, odd, allDDRowVars, allDDColVars, allDDNondetVars, time, min); + rewards = new StateValuesDV(rewardsDV, model); + break; + case Prism.HYBRID: + throw new PrismException("Hybrid engine does not yet support this type of property (use the sparse engine instead)"); + default: + throw new PrismException("Unknown engine"); + } + } catch (PrismException e) { + throw e; + } + + return rewards; + } + // compute rewards for inst reward protected StateValues computeInstRewards(JDDNode tr, JDDNode sr, int time, boolean min) throws PrismException diff --git a/prism/src/sparse/PrismSparse.java b/prism/src/sparse/PrismSparse.java index d25f7ca7..7d8f9c02 100644 --- a/prism/src/sparse/PrismSparse.java +++ b/prism/src/sparse/PrismSparse.java @@ -211,6 +211,15 @@ public class PrismSparse return new DoubleVector(ptr, (int)(odd.getEOff() + odd.getTOff())); } + // pctl cumulative reward (probabilistic/mdp) + private static native long PS_NondetCumulReward(long trans, long sr, long trr, long odd, long rv, int nrv, long cv, int ncv, long ndv, int nndv, int bound, boolean minmax); + public static DoubleVector NondetCumulReward(JDDNode trans, JDDNode sr, JDDNode trr, ODDNode odd, JDDVars rows, JDDVars cols, JDDVars nondet, int bound, boolean minmax) throws PrismException + { + long ptr = PS_NondetCumulReward(trans.ptr(), sr.ptr(), trr.ptr(), odd.ptr(), rows.array(), rows.n(), cols.array(), cols.n(), nondet.array(), nondet.n(), bound, minmax); + if (ptr == 0) throw new PrismException(getErrorMessage()); + return new DoubleVector(ptr, (int)(odd.getEOff() + odd.getTOff())); + } + // pctl inst reward (nondeterministic/mdp) private static native long PS_NondetInstReward(long trans, long sr, long odd, long rv, int nrv, long cv, int ncv, long ndv, int nndv, int time, boolean minmax, long init); public static DoubleVector NondetInstReward(JDDNode trans, JDDNode sr, ODDNode odd, JDDVars rows, JDDVars cols, JDDVars nondet, int time, boolean minmax, JDDNode init) throws PrismException