|
|
|
@ -1040,6 +1040,9 @@ public class MDPModelChecker extends ProbModelChecker |
|
|
|
case VALUE_ITERATION: |
|
|
|
res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known); |
|
|
|
break; |
|
|
|
case GAUSS_SEIDEL: |
|
|
|
res = computeReachRewardsGaussSeidel(mdp, mdpRewards, target, inf, min, init, known); |
|
|
|
break; |
|
|
|
default: |
|
|
|
throw new PrismException("Unknown MDP solution method " + solnMethod); |
|
|
|
} |
|
|
|
@ -1055,6 +1058,91 @@ public class MDPModelChecker extends ProbModelChecker |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Compute expected reachability rewards using Gauss-Seidel (including Jacobi-style updates). |
|
|
|
* @param mdp The MDP |
|
|
|
* @param mdpRewards The rewards |
|
|
|
* @param target Target states |
|
|
|
* @param inf States for which reward is infinite |
|
|
|
* @param min Min or max rewards (true=min, false=max) |
|
|
|
* @param init Optionally, an initial solution vector (will be overwritten) |
|
|
|
* @param known Optionally, a set of states for which the exact answer is known |
|
|
|
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. |
|
|
|
*/ |
|
|
|
protected ModelCheckerResult computeReachRewardsGaussSeidel(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException |
|
|
|
{ |
|
|
|
ModelCheckerResult res; |
|
|
|
BitSet unknown; |
|
|
|
int i, n, iters; |
|
|
|
double soln[], maxDiff; |
|
|
|
boolean done; |
|
|
|
long timer; |
|
|
|
|
|
|
|
// Start value iteration |
|
|
|
timer = System.currentTimeMillis(); |
|
|
|
mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")..."); |
|
|
|
|
|
|
|
// Store num states |
|
|
|
n = mdp.getNumStates(); |
|
|
|
|
|
|
|
// Create solution vector(s) |
|
|
|
soln = (init == null) ? new double[n] : init; |
|
|
|
|
|
|
|
// Initialise solution vector. Use (where available) the following in order of preference: |
|
|
|
// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0 |
|
|
|
if (init != null) { |
|
|
|
if (known != null) { |
|
|
|
for (i = 0; i < n; i++) |
|
|
|
soln[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; |
|
|
|
} else { |
|
|
|
for (i = 0; i < n; i++) |
|
|
|
soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i]; |
|
|
|
} |
|
|
|
} else { |
|
|
|
for (i = 0; i < n; i++) |
|
|
|
soln[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0; |
|
|
|
} |
|
|
|
|
|
|
|
// Determine set of states actually need to compute values for |
|
|
|
unknown = new BitSet(); |
|
|
|
unknown.set(0, n); |
|
|
|
unknown.andNot(target); |
|
|
|
unknown.andNot(inf); |
|
|
|
if (known != null) |
|
|
|
unknown.andNot(known); |
|
|
|
|
|
|
|
// Start iterations |
|
|
|
iters = 0; |
|
|
|
done = false; |
|
|
|
while (!done && iters < maxIters) { |
|
|
|
//mainLog.println(soln); |
|
|
|
iters++; |
|
|
|
// Matrix-vector multiply and min/max ops |
|
|
|
maxDiff = mdp.mvMultRewGSMinMax(soln, mdpRewards, min, unknown, false, termCrit == TermCrit.ABSOLUTE); |
|
|
|
// Check termination |
|
|
|
done = maxDiff < termCritParam; |
|
|
|
} |
|
|
|
|
|
|
|
// Finished Gauss-Seidel |
|
|
|
timer = System.currentTimeMillis() - timer; |
|
|
|
mainLog.print("Gauss-Seidel (" + (min ? "min" : "max") + ")"); |
|
|
|
mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds."); |
|
|
|
|
|
|
|
// Non-convergence is an error |
|
|
|
if (!done) { |
|
|
|
String msg = "Iterative method did not converge within " + iters + " iterations."; |
|
|
|
msg += "\nConsider using a different numerical method or increasing the maximum number of iterations"; |
|
|
|
throw new PrismException(msg); |
|
|
|
} |
|
|
|
|
|
|
|
// Return results |
|
|
|
res = new ModelCheckerResult(); |
|
|
|
res.soln = soln; |
|
|
|
res.numIters = iters; |
|
|
|
res.timeTaken = timer / 1000.0; |
|
|
|
return res; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Compute expected reachability rewards using value iteration. |
|
|
|
* @param mdp The MDP |
|
|
|
|