You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

933 lines
33 KiB

//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PrimitiveIterator;
import java.util.TreeMap;
import java.util.PrimitiveIterator.OfInt;
import common.IterableStateSet;
import common.IteratorTools;
import explicit.graphviz.Decorator;
import explicit.rewards.MCRewards;
import explicit.rewards.MDPRewards;
import prism.ModelType;
import prism.PrismException;
import prism.PrismLog;
import prism.PrismUtils;
/**
* Interface for classes that provide (read) access to an explicit-state MDP,
* where the transition probabilities are stored as double floating point values.
* <br>
* For the generic methods, e.g., the prob0 / prob1 precomputations that do not
* care about the concrete values, see {@link explicit.MDPGeneric}.
*/
public interface MDP extends MDPGeneric<Double>
{
// Accessors (for Model) - default implementations
@Override
default ModelType getModelType()
{
return ModelType.MDP;
}
@Override
default void exportToPrismExplicitTra(PrismLog out)
{
// Output transitions to .tra file
int numStates = getNumStates();
out.print(numStates + " " + getNumChoices() + " " + getNumTransitions() + "\n");
TreeMap<Integer, Double> sorted = new TreeMap<Integer, Double>();
for (int i = 0; i < numStates; i++) {
int numChoices = getNumChoices(i);
for (int j = 0; j < numChoices; j++) {
// Extract transitions and sort by destination state index (to match PRISM-exported files)
Iterator<Map.Entry<Integer, Double>> iter = getTransitionsIterator(i, j);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
sorted.put(e.getKey(), e.getValue());
}
// Print out (sorted) transitions
for (Map.Entry<Integer, Double> e : sorted.entrySet()) {
// Note use of PrismUtils.formatDouble to match PRISM-exported files
out.print(i + " " + j + " " + e.getKey() + " " + PrismUtils.formatDouble(e.getValue()));
Object action = getAction(i, j);
out.print(action == null ? "\n" : (" " + action + "\n"));
}
sorted.clear();
}
}
}
@Override
default void exportTransitionsToDotFile(int i, PrismLog out, Iterable<explicit.graphviz.Decorator> decorators)
{
int j, numChoices;
String nij;
Object action;
numChoices = getNumChoices(i);
for (j = 0; j < numChoices; j++) {
action = getAction(i, j);
nij = "n" + i + "_" + j;
out.print(i + " -> " + nij + " ");
explicit.graphviz.Decoration d = new explicit.graphviz.Decoration();
d.attributes().put("arrowhead", "none");
d.setLabel(j + (action != null ? ":" + action : ""));
if (decorators != null) {
for (Decorator decorator : decorators) {
d = decorator.decorateTransition(i, j, d);
}
}
out.print(d);
out.println(";");
out.print(nij + " [ shape=point,width=0.1,height=0.1,label=\"\" ];\n");
Iterator<Map.Entry<Integer, Double>> iter = getTransitionsIterator(i, j);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
out.print(nij + " -> " + e.getKey() + " ");
d = new explicit.graphviz.Decoration();
d.setLabel(e.getValue().toString());
if (decorators != null) {
for (Decorator decorator : decorators) {
d = decorator.decorateProbability(i, e.getKey(), j, e.getValue(), d);
}
}
out.print(d);
out.println(";");
}
}
}
@Override
default void exportToPrismLanguage(final String filename) throws PrismException
{
try (FileWriter out = new FileWriter(filename)) {
// Output transitions to PRISM language file
out.write(getModelType().keyword() + "\n");
final int numStates = getNumStates();
out.write("module M\nx : [0.." + (numStates - 1) + "];\n");
final TreeMap<Integer, Double> sorted = new TreeMap<Integer, Double>();
for (int state = 0; state < numStates; state++) {
for (int choice = 0, numChoices = getNumChoices(state); choice < numChoices; choice++) {
// Extract transitions and sort by destination state index (to match PRISM-exported files)
for (Iterator<Entry<Integer, Double>> transitions = getTransitionsIterator(state, choice); transitions.hasNext();) {
final Entry<Integer, Double> trans = transitions.next();
sorted.put(trans.getKey(), trans.getValue());
}
// Print out (sorted) transitions
final Object action = getAction(state, choice);
out.write(action != null ? ("[" + action + "]") : "[]");
out.write("x=" + state + "->");
boolean first = true;
for (Entry<Integer, Double> e : sorted.entrySet()) {
if (first)
first = false;
else
out.write("+");
// Note use of PrismUtils.formatDouble to match PRISM-exported files
out.write(PrismUtils.formatDouble(e.getValue()) + ":(x'=" + e.getKey() + ")");
}
out.write(";\n");
sorted.clear();
}
}
out.write("endmodule\n");
} catch (IOException e) {
throw new PrismException("Could not export " + getModelType() + " to file \"" + filename + "\"" + e);
}
}
@Override
default String infoString()
{
final int numStates = getNumStates();
String s = "";
s += numStates + " states (" + getNumInitialStates() + " initial)";
s += ", " + getNumTransitions() + " transitions";
s += ", " + getNumChoices() + " choices";
s += ", dist max/avg = " + getMaxNumChoices() + "/" + PrismUtils.formatDouble2dp(((double) getNumChoices()) / numStates);
return s;
}
@Override
default String infoStringTable()
{
final int numStates = getNumStates();
String s = "";
s += "States: " + numStates + " (" + getNumInitialStates() + " initial)\n";
s += "Transitions: " + getNumTransitions() + "\n";
s += "Choices: " + getNumChoices() + "\n";
s += "Max/avg: " + getMaxNumChoices() + "/" + PrismUtils.formatDouble2dp(((double) getNumChoices()) / numStates) + "\n";
return s;
}
// Accessors (for NondetModel) - default implementations
@Override
default int getNumTransitions(final int s, final int i)
{
return IteratorTools.count(getTransitionsIterator(s, i));
}
@Override
default void exportToDotFileWithStrat(PrismLog out, BitSet mark, int strat[])
{
int numStates = getNumStates();
out.print("digraph " + getModelType() + " {\nnode [shape=box];\n");
for (int i = 0; i < numStates; i++) {
if (mark != null && mark.get(i))
out.print(i + " [style=filled fillcolor=\"#cccccc\"]\n");
int numChoices = getNumChoices(i);
for (int j = 0; j < numChoices; j++) {
String style = (strat[i] == j) ? ",color=\"#ff0000\",fontcolor=\"#ff0000\"" : "";
Object action = getAction(i, j);
String nij = "n" + i + "_" + j;
out.print(i + " -> " + nij + " [ arrowhead=none,label=\"" + j);
if (action != null)
out.print(":" + action);
out.print("\"" + style + " ];\n");
out.print(nij + " [ shape=point,height=0.1,label=\"\"" + style + " ];\n");
Iterator<Map.Entry<Integer, Double>> iter = getTransitionsIterator(i, j);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
out.print(nij + " -> " + e.getKey() + " [ label=\"" + e.getValue() + "\"" + style + " ];\n");
}
}
}
out.print("}\n");
}
// Accessors
/**
* Get an iterator over the transitions from choice {@code i} of state {@code s}.
*/
public Iterator<Entry<Integer, Double>> getTransitionsIterator(int s, int i);
/**
* Functional interface for a consumer,
* accepting transitions (s,t,d), i.e.,
* from state s to state t with value d.
*/
@FunctionalInterface
public interface TransitionConsumer {
void accept(int s, int t, double d);
}
/**
* Iterate over the outgoing transitions of state {@code s} and choice {@code i}
* and call the accept method of the consumer for each of them:
* <br>
* Call {@code accept(s,t,d)} where t is the successor state d = P(s,i,t)
* is the probability from s to t with choice i.
* <p>
* <i>Default implementation</i>: The default implementation relies on iterating over the
* iterator returned by {@code getTransitionsIterator()}.
* <p><i>Note</i>: This method is the base for the default implementation of the numerical
* computation methods (mvMult, etc). In derived classes, it may thus be worthwhile to
* provide a specialised implementation for this method that avoids using the Iterator mechanism.
*
* @param s the state s
* @param i the choice i
* @param c the consumer
*/
public default void forEachTransition(int s, int i, TransitionConsumer c)
{
for (Iterator<Entry<Integer, Double>> it = getTransitionsIterator(s, i); it.hasNext(); ) {
Entry<Integer, Double> e = it.next();
c.accept(s, e.getKey(), e.getValue());
}
}
/**
* Functional interface for a function
* mapping transitions (s,t,d), i.e.,
* from state s to state t with value d,
* to a double value.
*/
@FunctionalInterface
public interface TransitionToDoubleFunction {
double apply(int s, int t, double d);
}
/**
* Iterate over the outgoing transitions of state {@code s} and choice {@code i},
* call the function {@code f} and return the sum of the result values:
* <br>
* Return sum_t f(s, t, P(s,i,t)), where t ranges over the i-successors of s.
*
* @param s the state s
* @param c the consumer
*/
public default double sumOverTransitions(final int s, final int i, final TransitionToDoubleFunction f)
{
class Sum {
double sum = 0.0;
void accept(int s, int t, double d)
{
sum += f.apply(s, t, d);
}
}
Sum sum = new Sum();
forEachTransition(s, i, sum::accept);
return sum.sum;
}
/**
* Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,
* i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
mvMultMinMax(vect, min, result, new IterableStateSet(subset, getNumStates(), complement).iterator(), strat);
}
/**
* Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,
* i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultMinMax(double vect[], boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])
{
while (states.hasNext()) {
final int s = states.nextInt();
result[s] = mvMultMinMaxSingle(s, vect, min, strat);
}
}
/**
* Do a single row of matrix-vector multiplication followed by min/max,
* i.e. return min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
// Compute sum for this distribution
double d = mvMultSingle(s, choice, vect);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = choice;
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max (true=min, false=max)
* @param val Min or max value to match
*/
public default List<Integer> mvMultMinMaxSingleChoices(int s, double vect[], boolean min, double val)
{
// Create data structures to store strategy
final List<Integer> result = new ArrayList<Integer>();
// One row of matrix-vector operation
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
// Compute sum for this distribution
double d = mvMultSingle(s, choice, vect);
// Store strategy info if value matches
if (PrismUtils.doublesAreEqual(val, d)) {
result.add(choice);
}
}
return result;
}
/**
* Do a single row of matrix-vector multiplication for a specific choice.
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
*/
public default double mvMultSingle(int s, int i, double vect[])
{
return sumOverTransitions(s, i, (int __, int t, double prob) -> {
return prob * vect[t];
});
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param absolute If true, compute absolute, rather than relative, difference
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @return The maximum difference between old/new elements of {@code vect}
*/
public default double mvMultGSMinMax(double vect[], boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])
{
return mvMultGSMinMax(vect, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param absolute If true, compute absolute, rather than relative, difference
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @return The maximum difference between old/new elements of {@code vect}
*/
public default double mvMultGSMinMax(double vect[], boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])
{
double d, diff, maxDiff = 0.0;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultJacMinMaxSingle(s, vect, min, strat);
diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);
maxDiff = diff > maxDiff ? diff : maxDiff;
vect[s] = d;
}
return maxDiff;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max in the context of interval iteration.
* i.e. for all s: vect[s] = min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param states Perform computation for these rows, in the iteration order
* @param ensureMonotonic ensure monotonicity
* @param fromBelow iteration from below or from above? (for ensureMonotonicity)
*/
public default void mvMultGSMinMaxIntervalIter(double vect[], boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)
{
double d;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultJacMinMaxSingle(s, vect, min, strat);
if (ensureMonotonic) {
if (fromBelow) {
// from below: do max old and new
if (vect[s] > d) {
d = vect[s];
}
} else {
// from above: do min old and new
if (vect[s] < d) {
d = vect[s];
}
}
vect[s] = d;
} else {
vect[s] = d;
}
}
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication followed by min/max.
* i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultJacMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultJacSingle(s, choice, vect);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null) {
stratCh = choice;
}
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication for a specific choice.
* i.e. return min/max_k { (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* @param s Row index
* @param i Choice index
* @param vect Vector to multiply by
*/
public default double mvMultJacSingle(int s, int i, double vect[])
{
class Jacobi {
double diag = 1.0;
double d = 0.0;
void accept(int s, int t, double prob) {
if (t != s) {
d += prob * vect[t];
} else {
diag -= prob;
}
}
}
Jacobi jac = new Jacobi();
forEachTransition(s, i, jac::accept);
double d = jac.d;
double diag = jac.diag;
if (diag > 0)
d /= diag;
return d;
}
/**
* Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
for (OfInt it = new IterableStateSet(subset, getNumStates(), complement).iterator(); it.hasNext();) {
final int s = it.nextInt();
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
}
}
/**
* Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], PrimitiveIterator.OfInt states, int strat[])
{
while (states.hasNext()) {
final int s = states.nextInt();
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
}
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. return min/max_k { rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewSingle(s, choice, vect, mdpRewards);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = choice;
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.
* i.e. rew(s) + rew_i(s) + sum_j P_i(s,j)*vect[j]
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
* @param mdpRewards The rewards (MDP rewards)
*/
public default double mvMultRewSingle(int s, int i, double vect[], MDPRewards mdpRewards)
{
double d = mdpRewards.getStateReward(s);
d += mdpRewards.getTransitionReward(s, i);
d += sumOverTransitions(s, i, (__, t, prob) -> {
return prob * vect[t];
});
return d;
}
/**
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice.
* i.e. rew(s) + rew_k(s) + sum_j P_k(s,j)*vect[j]
* @param s State (row) index
* @param i Choice index
* @param vect Vector to multiply by
* @param mcRewards The rewards (DTMC rewards)
*/
public default double mvMultRewSingle(int s, int i, double vect[], MCRewards mcRewards)
{
double d = mcRewards.getStateReward(s);
// TODO: add transition rewards when added to MCRewards
d += sumOverTransitions(s, i, (__, t, prob) -> {
return prob * vect[t];
});
return d;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param absolute If true, compute absolute, rather than relative, difference
* @return The maximum difference between old/new elements of {@code vect}
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, BitSet subset, boolean complement, boolean absolute, int strat[])
{
return mvMultRewGSMinMax(vect, mdpRewards, min, new IterableStateSet(subset, getNumStates(), complement).iterator(), absolute, strat);
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* The maximum (absolute/relative) difference between old/new
* elements of {@code vect} is also returned.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param absolute If true, compute absolute, rather than relative, difference
* @return The maximum difference between old/new elements of {@code vect}
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewGSMinMax(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, boolean absolute, int strat[])
{
double d, diff, maxDiff = 0.0;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);
diff = absolute ? (Math.abs(d - vect[s])) : (Math.abs(d - vect[s]) / d);
maxDiff = diff > maxDiff ? diff : maxDiff;
vect[s] = d;
}
return maxDiff;
}
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max,
* for interval iteration.
* i.e. for all s: vect[s] = min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* and store new values directly in {@code vect} as computed.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by (and store the result in)
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param states Perform computation for these rows, in the iteration order
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
* @param ensureMonotonic enforce monotonicity?
* @param fromBelow interval iteration from below? (for ensureMonotonic)
*/
public default void mvMultRewGSMinMaxIntervalIter(double vect[], MDPRewards mdpRewards, boolean min, PrimitiveIterator.OfInt states, int strat[], boolean ensureMonotonic, boolean fromBelow)
{
double d;
while (states.hasNext()) {
final int s = states.nextInt();
d = mvMultRewJacMinMaxSingle(s, vect, mdpRewards, min, strat);
if (ensureMonotonic) {
if (fromBelow) {
// from below: do max old and new
if (vect[s] > d) {
d = vect[s];
}
} else {
// from above: do min old and new
if (vect[s] < d) {
d = vect[s];
}
}
vect[s] = d;
} else {
vect[s] = d;
}
}
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards followed by min/max.
* i.e. return min/max_k { rew(s) + rew_k(s) + (sum_{j!=s} P_k(s,j)*vect[j]) / 1-P_k(s,s) }
* Optionally, store optimal (memoryless) strategy info.
* @param s State (row) index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public default double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int stratCh = -1;
double minmax = 0;
boolean first = true;
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewJacSingle(s, choice, vect, mdpRewards);
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If strategy generation is enabled, remember optimal choice
if (strat != null) {
stratCh = choice;
}
}
first = false;
}
// If strategy generation is enabled, store optimal choice
if (strat != null && !first) {
// For max, only remember strictly better choices
if (min) {
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
return minmax;
}
/**
* Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards,
* for a specific choice.
* i.e. return rew(s) + rew_i(s) + (sum_{j!=s} P_i(s,j)*vect[j]) / 1-P_i(s,s) }
* @param s State (row) index
* @param i the choice index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
*/
public default double mvMultRewJacSingle(int s, int i, double vect[], MDPRewards mdpRewards)
{
class Jacobi {
double diag = 1.0;
double d = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, i);
boolean onlySelfLoops = true;
void accept(int s, int t, double prob) {
if (t != s) {
d += prob * vect[t];
onlySelfLoops = false;
} else {
diag -= prob;
}
}
}
Jacobi jac = new Jacobi();
forEachTransition(s, i, jac::accept);
double d = jac.d;
double diag = jac.diag;
if (jac.onlySelfLoops) {
if (d != 0) {
// always choosing the selfloop-action will produce infinite reward
d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY);
} else {
// no reward & only self-loops: d remains 0
d = 0;
}
} else {
// not only self-loops, do Jacobi division
if (diag > 0)
d /= diag;
}
return d;
}
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of rewards.
* @param s State (row) index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max (true=min, false=max)
* @param val Min or max value to match
*/
public default List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
{
// Create data structures to store strategy
final List<Integer> result = new ArrayList<Integer>();
// One row of matrix-vector operation
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) {
double d = mvMultRewSingle(s, choice, vect, mdpRewards);
// Store strategy info if value matches
if (PrismUtils.doublesAreEqual(val, d)) {
result.add(choice);
}
}
return result;
}
/**
* Multiply the probability matrix induced by the MDP and {@code strat}
* to the right of {@code source}. Only those entries in {@code source}
* and only those columns in the probability matrix are considered, that
* are elements of {@code states}.
*
* The result of this multiplication is added to the contents of {@code dest}.
*
* @param states States for which to multiply
* @param strat (Memoryless) strategy to use
* @param source Vector to multiply matrix with
* @param dest Vector to write result to.
*/
public default void mvMultRight(int[] states, int[] strat, double[] source, double[] dest)
{
for (int state : states) {
forEachTransition(state, strat[state], (int s, int t, double prob) -> {
dest[t] += prob * source[s];
});
}
}
}