prism-accumulation/prism/src/explicit/MDP.java


								//==============================================================================

								//

								//	Copyright (c) 2002-

								//	Authors:

								//	* Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)

								//

								//------------------------------------------------------------------------------

								//

								//	This file is part of PRISM.

								//

								//	PRISM is free software; you can redistribute it and/or modify

								//	it under the terms of the GNU General Public License as published by

								//	the Free Software Foundation; either version 2 of the License, or

								//	(at your option) any later version.

								//

								//	PRISM is distributed in the hope that it will be useful,

								//	but WITHOUT ANY WARRANTY; without even the implied warranty of

								//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

								//	GNU General Public License for more details.

								//

								//	You should have received a copy of the GNU General Public License

								//	along with PRISM; if not, write to the Free Software Foundation,

								//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

								//

								//==============================================================================


								package explicit;


								import java.util.ArrayList;

								import java.util.BitSet;

								import java.util.Iterator;

								import java.util.List;

								import java.util.Map.Entry;

								import java.util.PrimitiveIterator;

								import java.util.PrimitiveIterator.OfInt;


								import common.IterableStateSet;

								import explicit.rewards.MCRewards;

								import explicit.rewards.MDPRewards;

								import prism.PrismUtils;


								/**

								 * Interface for classes that provide (read) access to an explicit-state MDP,

								 * where the transition probabilities are stored as double floating point values.

								 * <br>

								 * For the generic methods, e.g., the prob0 / prob1 precomputations that do not

								 * care about the concrete values, see {@link explicit.MDPGeneric}.

								 */

								public interface MDP extends MDPGeneric<Double>

								{

									/**

									 * Get an iterator over the transitions from choice {@code i} of state {@code s}.

									 */

									public Iterator<Entry<Integer, Double>> getTransitionsIterator(int s, int i);


									/**

									 * Functional interface for a consumer,

									 * accepting transitions (s,t,d), i.e.,

									 * from state s to state t with value d.

									 */

									@FunctionalInterface

									public interface TransitionConsumer {

										void accept(int s, int t, double d);

									}


									/**

									 * Iterate over the outgoing transitions of state {@code s} and choice {@code i}

									 * and call the accept method of the consumer for each of them:

									 * <br>

									 * Call {@code accept(s,t,d)} where t is the successor state d = P(s,i,t)

									 * is the probability from s to t with choice i.

									 * <p>

									 * <i>Default implementation</i>: The default implementation relies on iterating over the

									 * iterator returned by {@code getTransitionsIterator()}.

									 * <p><i>Note</i>: This method is the base for the default implementation of the numerical

									 * computation methods (mvMult, etc). In derived classes, it may thus be worthwhile to

									 * provide a specialised implementation for this method that avoids using the Iterator mechanism.

									 *

									 * @param s the state s

									 * @param i the choice i

									 * @param c the consumer

									 */

									public default void forEachTransition(int s, int i, TransitionConsumer c)

									{

										for (Iterator<Entry<Integer, Double>> it = getTransitionsIterator(s, i); it.hasNext(); ) {

											Entry<Integer, Double> e = it.next();

											c.accept(s, e.getKey(), e.getValue());

										}

									}


									/**

									 * Functional interface for a function

									 * mapping transitions (s,t,d), i.e.,

									 * from state s to state t with value d,

									 * to a double value.

									 */

									@FunctionalInterface

									public interface TransitionToDoubleFunction {

										double apply(int s, int t, double d);

									}


									/**

									 * Iterate over the outgoing transitions of state {@code s} and choice {@code i},

									 * call the function {@code f} and return the sum of the result values:

									 * <br>

									 * Return sum_t f(s, t, P(s,i,t)), where t ranges over the i-successors of s.

									 *

									 * @param s the state s

									 * @param c the consumer

									 */

									public default double sumOverTransitions(final int s, final int i, final TransitionToDoubleFunction f)

									{

										class Sum {

											double sum = 0.0;


											void accept(int s, int t, double d)

											{

												sum += f.apply(s, t, d);

											}

										}


										Sum sum = new Sum();

										forEachTransition(s, i, sum::accept);


										return sum.sum;

									}


									/**

									 * Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,

									 * i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by

									 * @param min Min or max for (true=min, false=max)

									 * @param result Vector to store result in

									 * @param subset Only do multiplication for these rows (ignored if null)

									 * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int strat[])

									{

										mvMultMinMax(vect, min, result, new IterableStateSet(subset, getNumStates(), complement).iterator(), strat);

									}


									/**

									 * Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,

									 * i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by

									 * @param min Min or max for (true=min, false=max)

									 * @param result Vector to store result in

									 * @param states Perform computation for these rows, in the iteration order

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default void mvMultMinMax(double vect[], boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])

									{

										while (states.hasNext()) {

											final int s = states.nextInt();

											result[s] = mvMultMinMaxSingle(s, vect, min, strat);

										}

									}


									/**

									 * Do a single row of matrix-vector multiplication followed by min/max,

									 * i.e. return min/max_k { sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param s Row index

									 * @param vect Vector to multiply by

									 * @param min Min or max for (true=min, false=max)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[])

									{

										int stratCh = -1;

										double minmax = 0;

										boolean first = true;


										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											// Compute sum for this distribution

											double d = mvMultSingle(s, choice, vect);


											// Check whether we have exceeded min/max so far

											if (first || (min && d < minmax) || (!min && d > minmax)) {

												minmax = d;

												// If strategy generation is enabled, remember optimal choice

												if (strat != null)

													stratCh = choice;

											}

											first = false;

										}

										// If strategy generation is enabled, store optimal choice

										if (strat != null && !first) {

											// For max, only remember strictly better choices

											if (min) {

												strat[s] = stratCh;

											} else if (strat[s] == -1 || minmax > vect[s]) {

												strat[s] = stratCh;

											}

										}


										return minmax;

									}


									/**

									 * Determine which choices result in min/max after a single row of matrix-vector multiplication.

									 * @param s Row index

									 * @param vect Vector to multiply by

									 * @param min Min or max (true=min, false=max)

									 * @param val Min or max value to match

									 */

									public default List<Integer> mvMultMinMaxSingleChoices(int s, double vect[], boolean min, double val)

									{

										// Create data structures to store strategy

										final List<Integer> result = new ArrayList<Integer>();

										// One row of matrix-vector operation

										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											// Compute sum for this distribution

											double d = mvMultSingle(s, choice, vect);


											// Store strategy info if value matches

											if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) {

												result.add(choice);

											}

										}


										return result;

									}


									/**

									 * Do a single row of matrix-vector multiplication for a specific choice.

									 * @param s State (row) index

									 * @param i Choice index

									 * @param vect Vector to multiply by

									 */

									public default double mvMultSingle(int s, int i, double vect[])

									{

										return sumOverTransitions(s, i, (int __, int t, double prob) -> {

											return prob * vect[t];

										});

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.

									 * i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * The maximum (absolute/relative) difference between old/new

									 * elements of {@code vect} is also returned.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param min Min or max for (true=min, false=max)

									 * @param subset Only do multiplication for these rows (ignored if null)

									 * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)

									 * @param absolute If true, compute absolute, rather than relative, difference

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 * @return The maximum difference between old/new elements of {@code vect}

									 */

									public default double mvMultGSMinMax(double vect[], boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])

									{

										return mvMultGSMinMax(vect, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.

									 * i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * The maximum (absolute/relative) difference between old/new

									 * elements of {@code vect} is also returned.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param min Min or max for (true=min, false=max)

									 * @param states Perform computation for these rows, in the iteration order

									 * @param absolute If true, compute absolute, rather than relative, difference

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 * @return The maximum difference between old/new elements of {@code vect}

									 */

									public default double mvMultGSMinMax(double vect[], boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])

									{

										double d, diff, maxDiff = 0.0;

										while (states.hasNext()) {

											final int s = states.nextInt();

											d = mvMultJacMinMaxSingle(s, vect, min, strat);

											diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);

											maxDiff = diff > maxDiff ? diff : maxDiff;

											vect[s] = d;

										}

										return maxDiff;

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max in the context of interval iteration.

									 * i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param min Min or max for (true=min, false=max)

									 * @param subset Only do multiplication for these rows (ignored if null)

									 * @param states Perform computation for these rows, in the iteration order

									 * @param ensureMonotonic ensure monotonicity

									 * @param fromBelow iteration from below or from above? (for ensureMonotonicity)

									 */

									public default void mvMultGSMinMaxIntervalIter(double vect[], boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)

									{

										double d;

										while (states.hasNext()) {

											final int s = states.nextInt();

											d = mvMultJacMinMaxSingle(s, vect, min, strat);

											if (ensureMonotonic) {

												if (fromBelow) {

													// from below: do max old and new

													if (vect[s] > d) {

														d = vect[s];

													}

												} else {

													// from above: do min old and new

													if (vect[s] < d) {

														d = vect[s];

													}

												}

												vect[s] = d;

											} else {

												vect[s] = d;

											}

										}

									}


									/**

									 * Do a single row of Jacobi-style matrix-vector multiplication followed by min/max.

									 * i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param s Row index

									 * @param vect Vector to multiply by

									 * @param min Min or max for (true=min, false=max)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultJacMinMaxSingle(int s, double vect[], boolean min, int strat[])

									{

										int stratCh = -1;

										double minmax = 0;

										boolean first = true;


										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											double d = mvMultJacSingle(s, choice, vect);


											// Check whether we have exceeded min/max so far

											if (first || (min && d < minmax) || (!min && d > minmax)) {

												minmax = d;

												// If strategy generation is enabled, remember optimal choice

												if (strat != null) {

													stratCh = choice;

												}

											}

											first = false;

										}

										// If strategy generation is enabled, store optimal choice

										if (strat != null && !first) {

											// For max, only remember strictly better choices

											if (min) {

												strat[s] = stratCh;

											} else if (strat[s] == -1 || minmax > vect[s]) {

												strat[s] = stratCh;

											}

										}


										return minmax;


									}


									/**

									 * Do a single row of Jacobi-style matrix-vector multiplication for a specific choice.

									 * i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * @param s Row index

									 * @param i Choice index

									 * @param vect Vector to multiply by

									 */

									public default double mvMultJacSingle(int s, int i, double vect[])

									{

										class Jacobi {

											double diag = 1.0;

											double d = 0.0;


											void accept(int s, int t, double prob) {

												if (t != s) {

													d += prob * vect[t];

												} else {

													diag -= prob;

												}

											}

										}


										Jacobi jac = new Jacobi();

										forEachTransition(s, i, jac::accept);


										double d = jac.d;

										double diag = jac.diag;

										if (diag > 0)

											d /= diag;


										return d;

									}


									/**

									 * Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.

									 * i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param result Vector to store result in

									 * @param subset Only do multiplication for these rows (ignored if null)

									 * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int strat[])

									{

										for (OfInt it = new IterableStateSet(subset, getNumStates(), complement).iterator(); it.hasNext();) {

											final int s = it.nextInt();

											result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);

										}

									}


									/**

									 * Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.

									 * i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param result Vector to store result in

									 * @param states Perform computation for these rows, in the iteration order

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])

									{

										while (states.hasNext()) {

											final int s = states.nextInt();

											result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);

										}

									}


									/**

									 * Do a single row of matrix-vector multiplication and sum of rewards followed by min/max.

									 * i.e. return min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param s Row index

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])

									{

										int stratCh = -1;

										double minmax = 0;

										boolean first = true;


										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											double d = mvMultRewSingle(s, choice, vect, mdpRewards);

											// Check whether we have exceeded min/max so far

											if (first || (min && d < minmax) || (!min && d > minmax)) {

												minmax = d;

												// If strategy generation is enabled, remember optimal choice

												if (strat != null)

													stratCh = choice;

											}

											first = false;

										}

										// If strategy generation is enabled, store optimal choice

										if (strat != null && !first) {

											// For max, only remember strictly better choices

											if (min) {

												strat[s] = stratCh;

											} else if (strat[s] == -1 || minmax > vect[s]) {

												strat[s] = stratCh;

											}

										}


										return minmax;

									}


									/**

									 * Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.

									 * i.e. rew(s) + rew_i(s) + sum_j P_i(s,j)*vect[j]

									 * @param s State (row) index

									 * @param i Choice index

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards (MDP rewards)

									 */

									public default double mvMultRewSingle(int s, int i, double vect[], MDPRewards mdpRewards)

									{

										double d = mdpRewards.getStateReward(s);

										d += mdpRewards.getTransitionReward(s, i);

										d += sumOverTransitions(s, i, (__, t, prob) -> {

											return prob * vect[t];

										});

										return d;

									}


									/**

									 * Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.

									 * i.e. rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j]

									 * @param s State (row) index

									 * @param i Choice index

									 * @param vect Vector to multiply by

									 * @param mcRewards The rewards (DTMC rewards)

									 */

									public default double mvMultRewSingle(int s, int i, double vect[], MCRewards mcRewards)

									{

										double d = mcRewards.getStateReward(s);

										// TODO: add transition rewards when added to MCRewards

										d += sumOverTransitions(s, i, (__, t, prob) -> {

											return prob * vect[t];

										});

										return d;

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.

									 * i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * The maximum (absolute/relative) difference between old/new

									 * elements of {@code vect} is also returned.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param subset Only do multiplication for these rows (ignored if null)

									 * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)

									 * @param absolute If true, compute absolute, rather than relative, difference

									 * @return The maximum difference between old/new elements of {@code vect}

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])

									{

										return mvMultRewGSMinMax(vect, mdpRewards, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.

									 * i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * The maximum (absolute/relative) difference between old/new

									 * elements of {@code vect} is also returned.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param states Perform computation for these rows, in the iteration order

									 * @param absolute If true, compute absolute, rather than relative, difference

									 * @return The maximum difference between old/new elements of {@code vect}

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])

									{

										double d, diff, maxDiff = 0.0;

										while (states.hasNext()) {

											final int s = states.nextInt();

											d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);

											diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);

											maxDiff = diff > maxDiff ? diff : maxDiff;

											vect[s] = d;

										}

										return maxDiff;

									}


									/**

									 * Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max,

									 * for interval iteration.

									 * i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * and store new values directly in {@code vect} as computed.

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param vect Vector to multiply by (and store the result in)

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param states Perform computation for these rows, in the iteration order

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 * @param ensureMonotonic enforce monotonicity?

									 * @param fromBelow interval iteration from below? (for ensureMonotonic)

									 */

									public default void mvMultRewGSMinMaxIntervalIter(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)

									{

										double d;

										while (states.hasNext()) {

											final int s = states.nextInt();

											d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);

											if (ensureMonotonic) {

												if (fromBelow) {

													// from below: do max old and new

													if (vect[s] > d) {

														d = vect[s];

													}

												} else {

													// from above: do min old and new

													if (vect[s] < d) {

														d = vect[s];

													}

												}

												vect[s] = d;

											} else {

												vect[s] = d;

											}

										}

									}


									/**

									 * Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards followed by min/max.

									 * i.e. return min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }

									 * Optionally, store optimal (memoryless) strategy info.

									 * @param s State (row) index

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 * @param min Min or max for (true=min, false=max)

									 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)

									 */

									public default double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])

									{

										int stratCh = -1;

										double minmax = 0;

										boolean first = true;


										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											double d = mvMultRewJacSingle(s, choice, vect, mdpRewards);

											// Check whether we have exceeded min/max so far

											if (first || (min && d < minmax) || (!min && d > minmax)) {

												minmax = d;

												// If strategy generation is enabled, remember optimal choice

												if (strat != null) {

													stratCh = choice;

												}

											}

											first = false;

										}

										// If strategy generation is enabled, store optimal choice

										if (strat != null && !first) {

											// For max, only remember strictly better choices

											if (min) {

												strat[s] = stratCh;

											} else if (strat[s] == -1 || minmax > vect[s]) {

												strat[s] = stratCh;

											}

										}


										return minmax;

									}


									/**

									 * Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards,

									 * for a specific choice.

									 * i.e. return rew(s) + rew_i(s) + (sum_{j!=s} P_i(s,j)*vect[j]) / 1-P_i(s,s) }

									 * @param s State (row) index

									 * @param i the choice index

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 */

									public default double mvMultRewJacSingle(int s, int i, double vect[], MDPRewards mdpRewards)

									{

										class Jacobi {

											double diag = 1.0;

											double d = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, i);

											boolean onlySelfLoops = true;


											void accept(int s, int t, double prob) {

												if (t != s) {

													d += prob * vect[t];

													onlySelfLoops = false;

												} else {

													diag -= prob;

												}

											}

										}


										Jacobi jac = new Jacobi();

										forEachTransition(s, i, jac::accept);


										double d = jac.d;

										double diag = jac.diag;


										if (jac.onlySelfLoops) {

											if (d != 0) {

												// always choosing the selfloop-action will produce infinite reward

												d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);

											} else {

												// no reward & only self-loops: d remains 0

												d = 0;

											}

										} else {

											// not only self-loops, do Jacobi division

											if (diag > 0)

												d /= diag;

										}


										return d;

									}


									/**

									 * Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of rewards.

									 * @param s State (row) index

									 * @param vect Vector to multiply by

									 * @param mdpRewards The rewards

									 * @param min Min or max (true=min, false=max)

									 * @param val Min or max value to match

									 */

									public default List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)

									{

										// Create data structures to store strategy

										final List<Integer> result = new ArrayList<Integer>();


										// One row of matrix-vector operation

										for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {

											double d = mvMultRewSingle(s, choice, vect, mdpRewards);

											// Store strategy info if value matches

											if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) {

												result.add(choice);

											}

										}


										return result;

									}


									/**

									 * Multiply the probability matrix induced by the MDP and {@code strat}

									 * to the right of {@code source}. Only those entries in {@code source}

									 * and only those columns in the probability matrix are considered, that

									 * are elements of {@code states}.

									 *

									 * The result of this multiplication is added to the contents of {@code dest}.

									 *

									 * @param states States for which to multiply

									 * @param strat (Memoryless) strategy to use

									 * @param source Vector to multiply matrix with

									 * @param dest Vector to write result to.

									 */

									public default void mvMultRight(int[] states, int[] strat, double[] source, double[] dest)

									{

										for (int state : states) {

											forEachTransition(state, strat[state], (int s, int t, double prob) -> {

												dest[t] += prob * source[s];

											});

										}

									}


								}