You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

741 lines
26 KiB

//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import java.util.PrimitiveIterator;
import java.util.PrimitiveIterator.OfInt;
import common.IterableStateSet;
import explicit.rewards.MCRewards;
import explicit.rewards.MDPRewards;
import prism.PrismUtils;
/**
* Interface for classes that provide (read) access to an explicit-state MDP,
* where the transition probabilities are stored as double floating point values.
* <br>
* For the generic methods, e.g., the prob0 / prob1 precomputations that do not
* care about the concrete values, see {@link explicit.MDPGeneric}.
*/
public interface MDP extends MDPGeneric<Double>
{
/**
* Get an iterator over the transitions from choice {@code i} of state {@code s}.
*/
public Iterator<Entry<Integer, Double>> getTransitionsIterator(int s, int i);
/**
* Functional interface for a consumer,
* accepting transitions (s,t,d), i.e.,
* from state s to state t with value d.
*/
@FunctionalInterface
public interface TransitionConsumer {
void accept(int s, int t, double d);
}
/**
* Iterate over the outgoing transitions of state {@code s} and choice {@code i}
* and call the accept method of the consumer for each of them:
* <br>
* Call {@code accept(s,t,d)} where t is the successor state d = P(s,i,t)
* is the probability from s to t with choice i.
* <p>
* <i>Default implementation</i>: The default implementation relies on iterating over the
* iterator returned by {@code getTransitionsIterator()}.
* <p><i>Note</i>: This method is the base for the default implementation of the numerical
* computation methods (mvMult, etc). In derived classes, it may thus be worthwhile to
* provide a specialised implementation for this method that avoids using the Iterator mechanism.
*
* @param s the state s
* @param i the choice i
* @param c the consumer
*/
public default void forEachTransition(int s, int i, TransitionConsumer c)
{
for (Iterator<Entry<Integer, Double>> it = getTransitionsIterator(s, i); it.hasNext(); ) {
Entry<Integer, Double> e = it.next();
c.accept(s, e.getKey(), e.getValue());
}
}
/**
* Functional interface for a function
* mapping transitions (s,t,d), i.e.,
* from state s to state t with value d,
* to a double value.
*/
@FunctionalInterface
public interface TransitionToDoubleFunction {
double apply(int s, int t, double d);
}
/**
* Iterate over the outgoing transitions of state {@code s} and choice {@code i},
* call the function {@code f} and return the sum of the result values:
* <br>
* Return sum_t f(s, t, P(s,i,t)), where t ranges over the i-successors of s.
*
* @param s the state s
* @param c the consumer
*/
public default double sumOverTransitions(final int s, final int i, final TransitionToDoubleFunction f)
{
class Sum {
double sum = 0.0;
void accept(int s, int t, double d)
{
sum += f.apply(s, t, d);
}
}
Sum sum = new Sum();
forEachTransition(s, i, sum::accept);
return sum.sum;
}
/**
* Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,
* i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
mvMultMinMax(vect, min, result, new IterableStateSet(subset, getNumStates(), complement).iterator(), strat);
}
/**
* Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,
* i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultMinMax(double vect[], boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])
{
while (states.hasNext()) {
final int s = states.nextInt();
result[s] = mvMultMinMaxSingle(s, vect, min, strat);
}
}
/**
* Do a single row of matrix-vector multiplication followed by min/max,
* i.e. return min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
// Compute sum for this distribution
double d = mvMultSingle(s, choice, vect);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = choice;
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max (true=min, false=max)
* @param val Min or max value to match
*/
public default List<Integer> mvMultMinMaxSingleChoices(int s, double vect[], boolean min, double val)
{
// Create data structures to store strategy
final List<Integer> result = new ArrayList<Integer>();
// One row of matrix-vector operation
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
// Compute sum for this distribution
double d = mvMultSingle(s, choice, vect);
// Store strategy info if value matches
if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) {
result.add(choice);
}
}
return result;
}
/**
* Do a single row of matrix-vector multiplication for a specific choice.
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
*/
public default double mvMultSingle(int s, int i, double vect[])
{
return sumOverTransitions(s, i, (int __, int t, double prob) -> {
return prob * vect[t];
});
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param absolute If true, compute absolute, rather than relative, difference
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @return The maximum difference between old/new elements of {@code vect}
*/
public default double mvMultGSMinMax(double vect[], boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])
{
return mvMultGSMinMax(vect, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param absolute If true, compute absolute, rather than relative, difference
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @return The maximum difference between old/new elements of {@code vect}
*/
public default double mvMultGSMinMax(double vect[], boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])
{
double d, diff, maxDiff = 0.0;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultJacMinMaxSingle(s, vect, min, strat);
diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);
maxDiff = diff > maxDiff ? diff : maxDiff;
vect[s] = d;
}
return maxDiff;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max in the context of interval iteration.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param states Perform computation for these rows, in the iteration order
* @param ensureMonotonic ensure monotonicity
* @param fromBelow iteration from below or from above? (for ensureMonotonicity)
*/
public default void mvMultGSMinMaxIntervalIter(double vect[], boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)
{
double d;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultJacMinMaxSingle(s, vect, min, strat);
if (ensureMonotonic) {
if (fromBelow) {
// from below: do max old and new
if (vect[s] > d) {
d = vect[s];
}
} else {
// from above: do min old and new
if (vect[s] < d) {
d = vect[s];
}
}
vect[s] = d;
} else {
vect[s] = d;
}
}
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication followed by min/max.
* i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultJacMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultJacSingle(s, choice, vect);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null) {
stratCh = choice;
}
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication for a specific choice.
* i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* @param s Row index
* @param i Choice index
* @param vect Vector to multiply by
*/
public default double mvMultJacSingle(int s, int i, double vect[])
{
class Jacobi {
double diag = 1.0;
double d = 0.0;
void accept(int s, int t, double prob) {
if (t != s) {
d += prob * vect[t];
} else {
diag -= prob;
}
}
}
Jacobi jac = new Jacobi();
forEachTransition(s, i, jac::accept);
double d = jac.d;
double diag = jac.diag;
if (diag > 0)
d /= diag;
return d;
}
/**
* Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
for (OfInt it = new IterableStateSet(subset, getNumStates(), complement).iterator(); it.hasNext();) {
final int s = it.nextInt();
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
}
}
/**
* Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])
{
while (states.hasNext()) {
final int s = states.nextInt();
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
}
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. return min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewSingle(s, choice, vect, mdpRewards);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = choice;
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.
* i.e. rew(s) + rew_i(s) + sum_j P_i(s,j)*vect[j]
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
* @param mdpRewards The rewards (MDP rewards)
*/
public default double mvMultRewSingle(int s, int i, double vect[], MDPRewards mdpRewards)
{
double d = mdpRewards.getStateReward(s);
d += mdpRewards.getTransitionReward(s, i);
d += sumOverTransitions(s, i, (__, t, prob) -> {
return prob * vect[t];
});
return d;
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.
* i.e. rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j]
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
* @param mcRewards The rewards (DTMC rewards)
*/
public default double mvMultRewSingle(int s, int i, double vect[], MCRewards mcRewards)
{
double d = mcRewards.getStateReward(s);
// TODO: add transition rewards when added to MCRewards
d += sumOverTransitions(s, i, (__, t, prob) -> {
return prob * vect[t];
});
return d;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param absolute If true, compute absolute, rather than relative, difference
* @return The maximum difference between old/new elements of {@code vect}
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])
{
return mvMultRewGSMinMax(vect, mdpRewards, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param absolute If true, compute absolute, rather than relative, difference
* @return The maximum difference between old/new elements of {@code vect}
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])
{
double d, diff, maxDiff = 0.0;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);
diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);
maxDiff = diff > maxDiff ? diff : maxDiff;
vect[s] = d;
}
return maxDiff;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max,
* for interval iteration.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @param ensureMonotonic enforce monotonicity?
* @param fromBelow interval iteration from below? (for ensureMonotonic)
*/
public default void mvMultRewGSMinMaxIntervalIter(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)
{
double d;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);
if (ensureMonotonic) {
if (fromBelow) {
// from below: do max old and new
if (vect[s] > d) {
d = vect[s];
}
} else {
// from above: do min old and new
if (vect[s] < d) {
d = vect[s];
}
}
vect[s] = d;
} else {
vect[s] = d;
}
}
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. return min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* Optionally, store optimal (memoryless) strategy info.
* @param s State (row) index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewJacSingle(s, choice, vect, mdpRewards);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null) {
stratCh = choice;
}
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards,
* for a specific choice.
* i.e. return rew(s) + rew_i(s) + (sum_{j!=s} P_i(s,j)*vect[j]) / 1-P_i(s,s) }
* @param s State (row) index
* @param i the choice index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
*/
public default double mvMultRewJacSingle(int s, int i, double vect[], MDPRewards mdpRewards)
{
class Jacobi {
double diag = 1.0;
double d = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, i);
boolean onlySelfLoops = true;
void accept(int s, int t, double prob) {
if (t != s) {
d += prob * vect[t];
onlySelfLoops = false;
} else {
diag -= prob;
}
}
}
Jacobi jac = new Jacobi();
forEachTransition(s, i, jac::accept);
double d = jac.d;
double diag = jac.diag;
if (jac.onlySelfLoops) {
if (d != 0) {
// always choosing the selfloop-action will produce infinite reward
d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
} else {
// no reward & only self-loops: d remains 0
d = 0;
}
} else {
// not only self-loops, do Jacobi division
if (diag > 0)
d /= diag;
}
return d;
}
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of rewards.
* @param s State (row) index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max (true=min, false=max)
* @param val Min or max value to match
*/
public default List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
{
// Create data structures to store strategy
final List<Integer> result = new ArrayList<Integer>();
// One row of matrix-vector operation
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewSingle(s, choice, vect, mdpRewards);
// Store strategy info if value matches
if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) {
result.add(choice);
}
}
return result;
}
/**
* Multiply the probability matrix induced by the MDP and {@code strat}
* to the right of {@code source}. Only those entries in {@code source}
* and only those columns in the probability matrix are considered, that
* are elements of {@code states}.
*
* The result of this multiplication is added to the contents of {@code dest}.
*
* @param states States for which to multiply
* @param strat (Memoryless) strategy to use
* @param source Vector to multiply matrix with
* @param dest Vector to write result to.
*/
public default void mvMultRight(int[] states, int[] strat, double[] source, double[] dest)
{
for (int state : states) {
forEachTransition(state, strat[state], (int s, int t, double prob) -> {
dest[t] += prob * source[s];
});
}
}
}