From 484cef6c43ef2cc9a456e2c824b39c80dfe9a191 Mon Sep 17 00:00:00 2001
From: Joachim Klein <klein@tcs.inf.tu-dresden.de>
Date: Tue, 29 Aug 2017 13:14:45 +0200
Subject: [PATCH] explicit.MDPSparse: fix mvMultRewJacMinMaxSingle to properly
 handle zero-reward selfloops

Now aligns with the default implementation in explicit.MDP: If there is a choice consisting of a self-loop,
produces value 0 for zero-reward state/choice and infinite value for positive-reward state/choice
(should be catched before in precomputations).

The previous behaviour (self-loops have infinite value) messes with maximal total reward computations with
Gauss-Seidel, e.g.
prism functionality/verify/mdps/rewards/total-reward-2.nm functionality/verify/mdps/rewards/total-reward-2.nm.props -explicit -gs -test -prop 3
for the PRISM test suite fails.

Also, align comments in MDP.mvMultRewJacMinMaxSingle.
---
 prism/src/explicit/MDP.java       |  1 +
 prism/src/explicit/MDPSparse.java | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)
diff --git a/prism/src/explicit/MDP.java b/prism/src/explicit/MDP.java
index 74e1b8f1..613d0a20 100644
--- a/prism/src/explicit/MDP.java
+++ b/prism/src/explicit/MDP.java
@@ -676,6 +676,7 @@ public interface MDP extends MDPGeneric<Double>
 
 		if (jac.onlySelfLoops) {
 			if (d != 0) {
+				// always choosing the selfloop-action will produce infinite reward
 				d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
 			} else {
 				// no reward & only self-loops: d remains 0
diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java
index 2cea8df3..4180d0df 100644
--- a/prism/src/explicit/MDPSparse.java
+++ b/prism/src/explicit/MDPSparse.java
@@ -1016,6 +1016,7 @@ public class MDPSparse extends MDPExplicit
 		h1 = rowStarts[s + 1];
 		for (j = l1; j < h1; j++) {
 			diag = 1.0;
+			boolean onlySelfloops = true;
 			// Compute sum for this distribution
 			// (note: have to add state rewards in the loop for Jacobi)
 			d = mdpRewards.getStateReward(s);
@@ -1024,17 +1025,26 @@ public class MDPSparse extends MDPExplicit
 			h2 = choiceStarts[j + 1];
 			for (k = l2; k < h2; k++) {
 				if (cols[k] != s) {
+					onlySelfloops = false;
 					d += nonZeros[k] * vect[cols[k]];
 				} else {
 					diag -= nonZeros[k];
 				}
 			}
-			if (diag > 0)
-				d /= diag;
 			// Catch special case of probability 1 self-loop (Jacobi does it wrong)
-			if (h2 - l2 == 1 && cols[l2] == s) {
-				d = Double.POSITIVE_INFINITY;
+			if (onlySelfloops) {
+				if (d != 0) {
+					// always choosing the selfloop-action will produce infinite reward
+					d = (d>0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
+				} else {
+					// no reward & only self-loops: d remains 0
+					d = 0;
+				}
+			} else if (diag > 0) {
+				// not only self-loops, do Jacobi division
+				d /= diag;
 			}
+
 			// Check whether we have exceeded min/max so far
 			if (first || (min && d < minmax) || (!min && d > minmax)) {
 				minmax = d;