From ebd19aa9d61ef9136dbf1274a7111e73d3925b84 Mon Sep 17 00:00:00 2001
From: Dave Parker <dave.x.parker@gmail.com>
Date: Thu, 13 Oct 2016 00:42:46 +0000
Subject: [PATCH] Fix bug (two bugs, actually) in Gauss-Seidel solution of MDP
 expected reward to a target for models with self-loops.

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@11841 bbc10eb1-c90d-0410-af57-cb519fbb1720
---
 prism/src/explicit/MDPSimple.java | 12 ++++++++----
 prism/src/explicit/MDPSparse.java | 15 +++++++++------
 2 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/prism/src/explicit/MDPSimple.java b/prism/src/explicit/MDPSimple.java
index 875c9875..a80e48c3 100644
--- a/prism/src/explicit/MDPSimple.java
+++ b/prism/src/explicit/MDPSimple.java
@@ -949,7 +949,7 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
 	@Override
 	public double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
 	{
-		int j, k, stratCh = -1;
+		int j, k = -1, stratCh = -1;
 		double diag, d, prob, minmax;
 		boolean first;
 		List<Distribution> step;
@@ -962,7 +962,9 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
 			j++;
 			diag = 1.0;
 			// Compute sum for this distribution
-			d = mdpRewards.getTransitionReward(s, j);
+			// (note: have to add state rewards in the loop for Jacobi)
+			d = mdpRewards.getStateReward(s);
+			d += mdpRewards.getTransitionReward(s, j);
 			for (Map.Entry<Integer, Double> e : distr) {
 				k = (Integer) e.getKey();
 				prob = (Double) e.getValue();
@@ -974,6 +976,10 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
 			}
 			if (diag > 0)
 				d /= diag;
+			// Catch special case of probability 1 self-loop (Jacobi does it wrong)
+			if (distr.size() == 1 && k == s) {
+				d = Double.POSITIVE_INFINITY;
+			}
 			// Check whether we have exceeded min/max so far
 			if (first || (min && d < minmax) || (!min && d > minmax)) {
 				minmax = d;
@@ -984,8 +990,6 @@ public class MDPSimple extends MDPExplicit implements NondetModelSimple
 			}
 			first = false;
 		}
-		// Add state reward (doesn't affect min/max)
-		minmax += mdpRewards.getStateReward(s);
 		// If strategy generation is enabled, store optimal choice
 		if (strat != null & !first) {
 			// For max, only remember strictly better choices
diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java
index 278a3231..e2d12556 100644
--- a/prism/src/explicit/MDPSparse.java
+++ b/prism/src/explicit/MDPSparse.java
@@ -41,12 +41,11 @@ import java.util.Map.Entry;
 import java.util.TreeMap;
 
 import common.IterableStateSet;
-
+import explicit.rewards.MCRewards;
+import explicit.rewards.MDPRewards;
 import parser.State;
 import prism.PrismException;
 import prism.PrismUtils;
-import explicit.rewards.MCRewards;
-import explicit.rewards.MDPRewards;
 
 /**
  * Sparse matrix (non-mutable) explicit-state representation of an MDP.
@@ -1022,7 +1021,9 @@ public class MDPSparse extends MDPExplicit
 		for (j = l1; j < h1; j++) {
 			diag = 1.0;
 			// Compute sum for this distribution
-			d = mdpRewards.getTransitionReward(s, j - l1);
+			// (note: have to add state rewards in the loop for Jacobi)
+			d = mdpRewards.getStateReward(s);
+			d += mdpRewards.getTransitionReward(s, j - l1);
 			l2 = choiceStarts[j];
 			h2 = choiceStarts[j + 1];
 			for (k = l2; k < h2; k++) {
@@ -1034,6 +1035,10 @@ public class MDPSparse extends MDPExplicit
 			}
 			if (diag > 0)
 				d /= diag;
+			// Catch special case of probability 1 self-loop (Jacobi does it wrong)
+			if (h2 - l2 == 1 && cols[l2] == s) {
+				d = Double.POSITIVE_INFINITY;
+			}
 			// Check whether we have exceeded min/max so far
 			if (first || (min && d < minmax) || (!min && d > minmax)) {
 				minmax = d;
@@ -1043,8 +1048,6 @@ public class MDPSparse extends MDPExplicit
 			}
 			first = false;
 		}
-		// Add state reward (doesn't affect min/max)
-		minmax += mdpRewards.getStateReward(s);
 		// If strategy generation is enabled, store optimal choice
 		if (strat != null & !first) {
 			// For max, only remember strictly better choices