Added R[C<=k] operator for MDPs (sparse, explicit).

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@7508 bbc10eb1-c90d-0410-af57-cb519fbb1720
12 years ago · edcac4a4a2
4 changed files with 179 additions and 1 deletions
--- a/prism/include/PrismSparse.h
+++ b/prism/include/PrismSparse.h
@ -111,6 +111,14 @@ JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetBoundedUntil
 JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetUntil
  (JNIEnv *, jclass, jlong, jlong, jobject, jlong, jlong, jint, jlong, jint, jlong, jint, jlong, jlong, jboolean, jlong);

+/*
+ * Class:     sparse_PrismSparse
+ * Method:    PS_NondetCumulReward
+ * Signature: (JJJJJIJIJIIZ)J
+ */
+JNIEXPORT jlong JNICALL Java_sparse_PrismSparse_PS_1NondetCumulReward
+  (JNIEnv *, jclass, jlong, jlong, jlong, jlong, jlong, jint, jlong, jint, jlong, jint, jint, jboolean);
+
 /*
 * Class:     sparse_PrismSparse
 * Method:    PS_NondetInstReward
--- a/prism/src/explicit/MDPModelChecker.java
+++ b/prism/src/explicit/MDPModelChecker.java
@ -36,6 +36,7 @@ import java.util.Vector;
 import parser.ast.Expression;
 import parser.ast.ExpressionTemporal;
 import parser.ast.ExpressionUnaryOp;
+import parser.type.TypeDouble;
 import parser.visitor.ASTTraverse;
 import prism.DRA;
 import prism.Pair;
@ -317,6 +318,9 @@ public class MDPModelChecker extends ProbModelChecker
 		if (expr instanceof ExpressionTemporal) {
 			ExpressionTemporal exprTemp = (ExpressionTemporal) expr;
 			switch (exprTemp.getOperator()) {
+			case ExpressionTemporal.R_C:
+				rewards = checkRewardCumul(model, modelRewards, exprTemp, min);
+				break;
 			case ExpressionTemporal.R_F:
 				rewards = checkRewardReach(model, modelRewards, exprTemp, min);
 				break;
@ -331,6 +335,43 @@ public class MDPModelChecker extends ProbModelChecker
 		return rewards;
 	}

+	/**
+	 * Compute rewards for a cumulative reward operator.
+	 */
+	protected StateValues checkRewardCumul(NondetModel model, MDPRewards modelRewards, ExpressionTemporal expr, boolean min) throws PrismException
+	{
+		int time; // time
+		StateValues rewards = null;
+		ModelCheckerResult res = null;
+
+		// check that there is an upper time bound
+		if (expr.getUpperBound() == null) {
+			throw new PrismException("Cumulative reward operator without time bound (C) is only allowed for multi-objective queries");
+		}
+
+		// get info from inst reward
+		time = expr.getUpperBound().evaluateInt(constantValues);
+		if (time < 0) {
+			throw new PrismException("Invalid time bound " + time + " in cumulative reward formula");
+		}
+
+		// a trivial case: "<=0"
+		if (time == 0) {
+			rewards = new StateValues(TypeDouble.getInstance(), model.getNumStates(), new Double(0));
+		} else {
+			// compute rewards
+			try {
+				res = computeCumulRewards((MDP) model, modelRewards, time, min);
+				rewards = StateValues.createFromDoubleArray(res.soln, model);
+				result.setStrategy(res.strat);
+			} catch (PrismException e) {
+				throw e;
+			}
+		}
+
+		return rewards;
+	}
+
 	/**
 	 * Compute rewards for a reachability reward operator.
 	 */
@ -1266,6 +1307,62 @@ public class MDPModelChecker extends ProbModelChecker
 		return res;
 	}

+	/**
+	 * Compute expected cumulative (step-bounded) rewards.
+	 * i.e. compute the min/max reward accumulated within {@code k} steps.
+	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
+	 * @param target Target states
+	 * @param min Min or max rewards (true=min, false=max)
+	 */
+	public ModelCheckerResult computeCumulRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException
+	{
+		ModelCheckerResult res = null;
+		int i, n, iters;
+		long timer;
+		double soln[], soln2[], tmpsoln[];
+
+		// Start expected cumulative reward
+		timer = System.currentTimeMillis();
+		mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")...");
+
+		// Store num states
+		n = mdp.getNumStates();
+
+		// Create/initialise solution vector(s)
+		soln = new double[n];
+		soln2 = new double[n];
+		for (i = 0; i < n; i++)
+			soln[i] = soln2[i] = 0.0;
+
+		// Start iterations
+		iters = 0;
+		while (iters < k) {
+			iters++;
+			// Matrix-vector multiply and min/max ops
+			int strat[] = new int[n];
+			mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, strat);
+			mainLog.println(strat);
+			// Swap vectors for next iter
+			tmpsoln = soln;
+			soln = soln2;
+			soln2 = tmpsoln;
+		}
+
+		// Finished value iteration
+		timer = System.currentTimeMillis() - timer;
+		mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")");
+		mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
+
+		// Return results
+		res = new ModelCheckerResult();
+		res.soln = soln;
+		res.numIters = iters;
+		res.timeTaken = timer / 1000.0;
+
+		return res;
+	}
+
 	/**
 	 * Compute expected reachability rewards.
 	 * @param mdp The MDP
--- a/prism/src/prism/NondetModelChecker.java
+++ b/prism/src/prism/NondetModelChecker.java
@ -298,7 +298,8 @@ public class NondetModelChecker extends NonProbModelChecker
 		if (expr2 instanceof ExpressionTemporal) {
 			switch (((ExpressionTemporal) expr2).getOperator()) {
 			case ExpressionTemporal.R_C:
-				throw new PrismException("Cumulative reward operator (C<=k) not yet supported for MDPs");
+				rewards = checkRewardCumul((ExpressionTemporal) expr2, stateRewards, transRewards, min);
+				break;
 			case ExpressionTemporal.R_I:
 				rewards = checkRewardInst((ExpressionTemporal) expr2, stateRewards, transRewards, min);
 				break;
@ -1590,6 +1591,39 @@ public class NondetModelChecker extends NonProbModelChecker
 		return probs;
 	}

+	// cumulative reward
+
+	protected StateValues checkRewardCumul(ExpressionTemporal expr, JDDNode stateRewards, JDDNode transRewards, boolean min) throws PrismException
+	{
+		int time; // time
+		StateValues rewards = null;
+
+		// check that there is an upper time bound
+		if (expr.getUpperBound() == null) {
+			throw new PrismException("Cumulative reward operator without time bound (C) is only allowed for multi-objective queries");
+		}
+
+		// get info from inst reward
+		time = expr.getUpperBound().evaluateInt(constantValues);
+		if (time < 0) {
+			throw new PrismException("Invalid time bound " + time + " in cumulative reward formula");
+		}
+
+		// a trivial case: "<=0"
+		if (time == 0) {
+			rewards = new StateValuesMTBDD(JDD.Constant(0), model);
+		} else {
+			// compute rewards
+			try {
+				rewards = computeCumulRewards(trans, stateRewards, transRewards, time, min);
+			} catch (PrismException e) {
+				throw e;
+			}
+		}
+
+		return rewards;
+	}
+
 	// inst reward

 	protected StateValues checkRewardInst(ExpressionTemporal expr, JDDNode stateRewards, JDDNode transRewards, boolean min) throws PrismException
@ -2162,6 +2196,36 @@ public class NondetModelChecker extends NonProbModelChecker
 		return value;
 	}

+	// compute cumulative rewards
+
+	protected StateValues computeCumulRewards(JDDNode tr, JDDNode sr, JDDNode trr, int time, boolean min) throws PrismException
+	{
+		DoubleVector rewardsDV;
+		StateValues rewards = null;
+
+		// compute rewards
+		mainLog.println("\nComputing rewards...");
+		mainLog.println("Engine: " + Prism.getEngineString(engine));
+		try {
+			switch (engine) {
+			case Prism.MTBDD:
+				throw new PrismException("MTBDD engine does not yet support this type of property (use the sparse engine instead)");
+			case Prism.SPARSE:
+				rewardsDV = PrismSparse.NondetCumulReward(tr, sr, trr, odd, allDDRowVars, allDDColVars, allDDNondetVars, time, min);
+				rewards = new StateValuesDV(rewardsDV, model);
+				break;
+			case Prism.HYBRID:
+				throw new PrismException("Hybrid engine does not yet support this type of property (use the sparse engine instead)");
+			default:
+				throw new PrismException("Unknown engine");
+			}
+		} catch (PrismException e) {
+			throw e;
+		}
+
+		return rewards;
+	}
+
 	// compute rewards for inst reward

 	protected StateValues computeInstRewards(JDDNode tr, JDDNode sr, int time, boolean min) throws PrismException
--- a/prism/src/sparse/PrismSparse.java
+++ b/prism/src/sparse/PrismSparse.java
@ -211,6 +211,15 @@ public class PrismSparse
 		return new DoubleVector(ptr, (int)(odd.getEOff() + odd.getTOff()));
 	}
 	
+	// pctl cumulative reward (probabilistic/mdp)
+	private static native long PS_NondetCumulReward(long trans, long sr, long trr, long odd, long rv, int nrv, long cv, int ncv, long ndv, int nndv, int bound, boolean minmax);
+	public static DoubleVector NondetCumulReward(JDDNode trans, JDDNode sr, JDDNode trr, ODDNode odd, JDDVars rows, JDDVars cols, JDDVars nondet, int bound, boolean minmax) throws PrismException
+	{
+		long ptr = PS_NondetCumulReward(trans.ptr(), sr.ptr(), trr.ptr(), odd.ptr(), rows.array(), rows.n(), cols.array(), cols.n(), nondet.array(), nondet.n(), bound, minmax);
+		if (ptr == 0) throw new PrismException(getErrorMessage());
+		return new DoubleVector(ptr, (int)(odd.getEOff() + odd.getTOff()));
+	}
+
 	// pctl inst reward (nondeterministic/mdp)
 	private static native long PS_NondetInstReward(long trans, long sr, long odd, long rv, int nrv, long cv, int ncv, long ndv, int nndv, int time, boolean minmax, long init);
 	public static DoubleVector NondetInstReward(JDDNode trans, JDDNode sr, ODDNode odd, JDDVars rows, JDDVars cols, JDDVars nondet, int time, boolean minmax, JDDNode init) throws PrismException