From 484cef6c43ef2cc9a456e2c824b39c80dfe9a191 Mon Sep 17 00:00:00 2001 From: Joachim Klein Date: Tue, 29 Aug 2017 13:14:45 +0200 Subject: [PATCH] explicit.MDPSparse: fix mvMultRewJacMinMaxSingle to properly handle zero-reward selfloops Now aligns with the default implementation in explicit.MDP: If there is a choice consisting of a self-loop, produces value 0 for zero-reward state/choice and infinite value for positive-reward state/choice (should be catched before in precomputations). The previous behaviour (self-loops have infinite value) messes with maximal total reward computations with Gauss-Seidel, e.g. prism functionality/verify/mdps/rewards/total-reward-2.nm functionality/verify/mdps/rewards/total-reward-2.nm.props -explicit -gs -test -prop 3 for the PRISM test suite fails. Also, align comments in MDP.mvMultRewJacMinMaxSingle. --- prism/src/explicit/MDP.java | 1 + prism/src/explicit/MDPSparse.java | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/prism/src/explicit/MDP.java b/prism/src/explicit/MDP.java index 74e1b8f1..613d0a20 100644 --- a/prism/src/explicit/MDP.java +++ b/prism/src/explicit/MDP.java @@ -676,6 +676,7 @@ public interface MDP extends MDPGeneric if (jac.onlySelfLoops) { if (d != 0) { + // always choosing the selfloop-action will produce infinite reward d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); } else { // no reward & only self-loops: d remains 0 diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java index 2cea8df3..4180d0df 100644 --- a/prism/src/explicit/MDPSparse.java +++ b/prism/src/explicit/MDPSparse.java @@ -1016,6 +1016,7 @@ public class MDPSparse extends MDPExplicit h1 = rowStarts[s + 1]; for (j = l1; j < h1; j++) { diag = 1.0; + boolean onlySelfloops = true; // Compute sum for this distribution // (note: have to add state rewards in the loop for Jacobi) d = mdpRewards.getStateReward(s); @@ -1024,17 +1025,26 @@ public class MDPSparse extends MDPExplicit h2 = choiceStarts[j + 1]; for (k = l2; k < h2; k++) { if (cols[k] != s) { + onlySelfloops = false; d += nonZeros[k] * vect[cols[k]]; } else { diag -= nonZeros[k]; } } - if (diag > 0) - d /= diag; // Catch special case of probability 1 self-loop (Jacobi does it wrong) - if (h2 - l2 == 1 && cols[l2] == s) { - d = Double.POSITIVE_INFINITY; + if (onlySelfloops) { + if (d != 0) { + // always choosing the selfloop-action will produce infinite reward + d = (d>0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); + } else { + // no reward & only self-loops: d remains 0 + d = 0; + } + } else if (diag > 0) { + // not only self-loops, do Jacobi division + d /= diag; } + // Check whether we have exceeded min/max so far if (first || (min && d < minmax) || (!min && d > minmax)) { minmax = d;