From ebd19aa9d61ef9136dbf1274a7111e73d3925b84 Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Thu, 13 Oct 2016 00:42:46 +0000 Subject: [PATCH] Fix bug (two bugs, actually) in Gauss-Seidel solution of MDP expected reward to a target for models with self-loops. git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@11841 bbc10eb1-c90d-0410-af57-cb519fbb1720 --- prism/src/explicit/MDPSimple.java | 12 ++++++++---- prism/src/explicit/MDPSparse.java | 15 +++++++++------ 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/prism/src/explicit/MDPSimple.java b/prism/src/explicit/MDPSimple.java index 875c9875..a80e48c3 100644 --- a/prism/src/explicit/MDPSimple.java +++ b/prism/src/explicit/MDPSimple.java @@ -949,7 +949,7 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple @Override public double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]) { - int j, k, stratCh = -1; + int j, k = -1, stratCh = -1; double diag, d, prob, minmax; boolean first; List step; @@ -962,7 +962,9 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple j++; diag = 1.0; // Compute sum for this distribution - d = mdpRewards.getTransitionReward(s, j); + // (note: have to add state rewards in the loop for Jacobi) + d = mdpRewards.getStateReward(s); + d += mdpRewards.getTransitionReward(s, j); for (Map.Entry e : distr) { k = (Integer) e.getKey(); prob = (Double) e.getValue(); @@ -974,6 +976,10 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple } if (diag > 0) d /= diag; + // Catch special case of probability 1 self-loop (Jacobi does it wrong) + if (distr.size() == 1 && k == s) { + d = Double.POSITIVE_INFINITY; + } // Check whether we have exceeded min/max so far if (first || (min && d < minmax) || (!min && d > minmax)) { minmax = d; @@ -984,8 +990,6 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple } first = false; } - // Add state reward (doesn't affect min/max) - minmax += mdpRewards.getStateReward(s); // If strategy generation is enabled, store optimal choice if (strat != null & !first) { // For max, only remember strictly better choices diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java index 278a3231..e2d12556 100644 --- a/prism/src/explicit/MDPSparse.java +++ b/prism/src/explicit/MDPSparse.java @@ -41,12 +41,11 @@ import java.util.Map.Entry; import java.util.TreeMap; import common.IterableStateSet; - +import explicit.rewards.MCRewards; +import explicit.rewards.MDPRewards; import parser.State; import prism.PrismException; import prism.PrismUtils; -import explicit.rewards.MCRewards; -import explicit.rewards.MDPRewards; /** * Sparse matrix (non-mutable) explicit-state representation of an MDP. @@ -1022,7 +1021,9 @@ public class MDPSparse extends MDPExplicit for (j = l1; j < h1; j++) { diag = 1.0; // Compute sum for this distribution - d = mdpRewards.getTransitionReward(s, j - l1); + // (note: have to add state rewards in the loop for Jacobi) + d = mdpRewards.getStateReward(s); + d += mdpRewards.getTransitionReward(s, j - l1); l2 = choiceStarts[j]; h2 = choiceStarts[j + 1]; for (k = l2; k < h2; k++) { @@ -1034,6 +1035,10 @@ public class MDPSparse extends MDPExplicit } if (diag > 0) d /= diag; + // Catch special case of probability 1 self-loop (Jacobi does it wrong) + if (h2 - l2 == 1 && cols[l2] == s) { + d = Double.POSITIVE_INFINITY; + } // Check whether we have exceeded min/max so far if (first || (min && d < minmax) || (!min && d > minmax)) { minmax = d; @@ -1043,8 +1048,6 @@ public class MDPSparse extends MDPExplicit } first = false; } - // Add state reward (doesn't affect min/max) - minmax += mdpRewards.getStateReward(s); // If strategy generation is enabled, store optimal choice if (strat != null & !first) { // For max, only remember strictly better choices