Browse Source

Fix bug (two bugs, actually) in Gauss-Seidel solution of MDP expected reward to a target for models with self-loops.

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@11841 bbc10eb1-c90d-0410-af57-cb519fbb1720
master
Dave Parker 9 years ago
parent
commit
ebd19aa9d6
  1. 12
      prism/src/explicit/MDPSimple.java
  2. 15
      prism/src/explicit/MDPSparse.java

12
prism/src/explicit/MDPSimple.java

@ -949,7 +949,7 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
@Override
public double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int j, k, stratCh = -1;
int j, k = -1, stratCh = -1;
double diag, d, prob, minmax;
boolean first;
List<Distribution> step;
@ -962,7 +962,9 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
j++;
diag = 1.0;
// Compute sum for this distribution
d = mdpRewards.getTransitionReward(s, j);
// (note: have to add state rewards in the loop for Jacobi)
d = mdpRewards.getStateReward(s);
d += mdpRewards.getTransitionReward(s, j);
for (Map.Entry<Integer, Double> e : distr) {
k = (Integer) e.getKey();
prob = (Double) e.getValue();
@ -974,6 +976,10 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
}
if (diag > 0)
d /= diag;
// Catch special case of probability 1 self-loop (Jacobi does it wrong)
if (distr.size() == 1 && k == s) {
d = Double.POSITIVE_INFINITY;
}
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
@ -984,8 +990,6 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
}
first = false;
}
// Add state reward (doesn't affect min/max)
minmax += mdpRewards.getStateReward(s);
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// For max, only remember strictly better choices

15
prism/src/explicit/MDPSparse.java

@ -41,12 +41,11 @@ import java.util.Map.Entry;
import java.util.TreeMap;
import common.IterableStateSet;
import explicit.rewards.MCRewards;
import explicit.rewards.MDPRewards;
import parser.State;
import prism.PrismException;
import prism.PrismUtils;
import explicit.rewards.MCRewards;
import explicit.rewards.MDPRewards;
/**
* Sparse matrix (non-mutable) explicit-state representation of an MDP.
@ -1022,7 +1021,9 @@ public class MDPSparse extends MDPExplicit
for (j = l1; j < h1; j++) {
diag = 1.0;
// Compute sum for this distribution
d = mdpRewards.getTransitionReward(s, j - l1);
// (note: have to add state rewards in the loop for Jacobi)
d = mdpRewards.getStateReward(s);
d += mdpRewards.getTransitionReward(s, j - l1);
l2 = choiceStarts[j];
h2 = choiceStarts[j + 1];
for (k = l2; k < h2; k++) {
@ -1034,6 +1035,10 @@ public class MDPSparse extends MDPExplicit
}
if (diag > 0)
d /= diag;
// Catch special case of probability 1 self-loop (Jacobi does it wrong)
if (h2 - l2 == 1 && cols[l2] == s) {
d = Double.POSITIVE_INFINITY;
}
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
@ -1043,8 +1048,6 @@ public class MDPSparse extends MDPExplicit
}
first = false;
}
// Add state reward (doesn't affect min/max)
minmax += mdpRewards.getStateReward(s);
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// For max, only remember strictly better choices

Loading…
Cancel
Save