From 0ee323ea6a3d31f120b13771654c608377791ac5 Mon Sep 17 00:00:00 2001 From: Joachim Klein Date: Thu, 16 Feb 2017 18:43:34 +0000 Subject: [PATCH] explicit.MDPModelChecker: implement instantaneous reward computation (Rmax/min [I=x]) git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@11979 bbc10eb1-c90d-0410-af57-cb519fbb1720 --- prism/src/explicit/MDPModelChecker.java | 56 ++++++++++++++++++++++++ prism/src/explicit/ProbModelChecker.java | 11 ++++- 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java index b2b1e183..4f2c16b5 100644 --- a/prism/src/explicit/MDPModelChecker.java +++ b/prism/src/explicit/MDPModelChecker.java @@ -1237,6 +1237,61 @@ public class MDPModelChecker extends ProbModelChecker return res; } + /** + * Compute expected instantaneous reward, + * i.e. compute the min/max expected reward of the states after {@code k} steps. + * @param mdp The MDP + * @param mdpRewards The rewards + * @param k the number of steps + * @param min Min or max rewards (true=min, false=max) + */ + public ModelCheckerResult computeInstantaneousRewards(MDP mdp, MDPRewards mdpRewards, final int k, boolean min) + { + ModelCheckerResult res = null; + int i, n, iters; + double soln[], soln2[], tmpsoln[]; + long timer; + + // Store num states + n = mdp.getNumStates(); + + // Start backwards transient computation + timer = System.currentTimeMillis(); + mainLog.println("\nStarting backwards instantaneous rewards computation..."); + + // Create solution vector(s) + soln = new double[n]; + soln2 = new double[n]; + + // Initialise solution vectors. + for (i = 0; i < n; i++) + soln[i] = mdpRewards.getStateReward(i); + + // Start iterations + for (iters = 0; iters < k; iters++) { + // Matrix-vector multiply + mdp.mvMultMinMax(soln, min, soln2, null, false, null); + // Swap vectors for next iter + tmpsoln = soln; + soln = soln2; + soln2 = tmpsoln; + } + + // Finished backwards transient computation + timer = System.currentTimeMillis() - timer; + mainLog.print("Backwards transient instantaneous rewards computation"); + mainLog.println(" took " + iters + " iters and " + timer / 1000.0 + " seconds."); + + // Return results + res = new ModelCheckerResult(); + res.soln = soln; + res.lastSoln = soln2; + res.numIters = iters; + res.timeTaken = timer / 1000.0; + res.timePre = 0.0; + return res; + } + /** * Compute expected reachability rewards. * @param mdp The MDP @@ -1776,4 +1831,5 @@ public class MDPModelChecker extends ProbModelChecker System.out.println(e); } } + } diff --git a/prism/src/explicit/ProbModelChecker.java b/prism/src/explicit/ProbModelChecker.java index 23026ac9..67eda68a 100644 --- a/prism/src/explicit/ProbModelChecker.java +++ b/prism/src/explicit/ProbModelChecker.java @@ -984,14 +984,21 @@ public class ProbModelChecker extends NonProbModelChecker // Compute/return the rewards ModelCheckerResult res = null; switch (model.getModelType()) { - case DTMC: + case DTMC: { int k = expr.getUpperBound().evaluateInt(constantValues); res = ((DTMCModelChecker) this).computeInstantaneousRewards((DTMC) model, (MCRewards) modelRewards, k, statesOfInterest); break; - case CTMC: + } + case CTMC: { double t = expr.getUpperBound().evaluateDouble(constantValues); res = ((CTMCModelChecker) this).computeInstantaneousRewards((CTMC) model, (MCRewards) modelRewards, t); break; + } + case MDP: { + int k = expr.getUpperBound().evaluateInt(constantValues); + res = ((MDPModelChecker) this).computeInstantaneousRewards((MDP) model, (MDPRewards) modelRewards, k, minMax.isMin()); + break; + } default: throw new PrismNotSupportedException("Explicit engine does not yet handle the " + expr.getOperatorSymbol() + " reward operator for " + model.getModelType() + "s");