|
|
|
@ -26,6 +26,7 @@ |
|
|
|
|
|
|
|
package explicit; |
|
|
|
|
|
|
|
import java.util.ArrayList; |
|
|
|
import java.util.BitSet; |
|
|
|
import java.util.Iterator; |
|
|
|
import java.util.List; |
|
|
|
@ -35,6 +36,7 @@ import java.util.PrimitiveIterator.OfInt; |
|
|
|
import common.IterableStateSet; |
|
|
|
import explicit.rewards.MCRewards; |
|
|
|
import explicit.rewards.MDPRewards; |
|
|
|
import prism.PrismUtils; |
|
|
|
|
|
|
|
/** |
|
|
|
* Interface for classes that provide (read) access to an explicit-state MDP. |
|
|
|
@ -289,7 +291,37 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max for (true=min, false=max) |
|
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null) |
|
|
|
*/ |
|
|
|
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[]); |
|
|
|
public default double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[]) |
|
|
|
{ |
|
|
|
int stratCh = -1; |
|
|
|
double minmax = 0; |
|
|
|
boolean first = true; |
|
|
|
|
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
// Compute sum for this distribution |
|
|
|
double d = mvMultSingle(s, choice, vect); |
|
|
|
|
|
|
|
// Check whether we have exceeded min/max so far |
|
|
|
if (first || (min && d < minmax) || (!min && d > minmax)) { |
|
|
|
minmax = d; |
|
|
|
// If strategy generation is enabled, remember optimal choice |
|
|
|
if (strat != null) |
|
|
|
stratCh = choice; |
|
|
|
} |
|
|
|
first = false; |
|
|
|
} |
|
|
|
// If strategy generation is enabled, store optimal choice |
|
|
|
if (strat != null && !first) { |
|
|
|
// For max, only remember strictly better choices |
|
|
|
if (min) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} else if (strat[s] == -1 || minmax > vect[s]) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return minmax; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Determine which choices result in min/max after a single row of matrix-vector multiplication. |
|
|
|
@ -298,7 +330,23 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max (true=min, false=max) |
|
|
|
* @param val Min or max value to match |
|
|
|
*/ |
|
|
|
public List<Integer> mvMultMinMaxSingleChoices(int s, double vect[], boolean min, double val); |
|
|
|
public default List<Integer> mvMultMinMaxSingleChoices(int s, double vect[], boolean min, double val) |
|
|
|
{ |
|
|
|
// Create data structures to store strategy |
|
|
|
final List<Integer> result = new ArrayList<Integer>(); |
|
|
|
// One row of matrix-vector operation |
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
// Compute sum for this distribution |
|
|
|
double d = mvMultSingle(s, choice, vect); |
|
|
|
|
|
|
|
// Store strategy info if value matches |
|
|
|
if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) { |
|
|
|
result.add(choice); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a single row of matrix-vector multiplication for a specific choice. |
|
|
|
@ -306,7 +354,12 @@ public interface MDP extends NondetModel |
|
|
|
* @param i Choice index |
|
|
|
* @param vect Vector to multiply by |
|
|
|
*/ |
|
|
|
public double mvMultSingle(int s, int i, double vect[]); |
|
|
|
public default double mvMultSingle(int s, int i, double vect[]) |
|
|
|
{ |
|
|
|
return sumOverTransitions(s, i, (int __, int t, double prob) -> { |
|
|
|
return prob * vect[t]; |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a Gauss-Seidel-style matrix-vector multiplication followed by min/max. |
|
|
|
@ -354,7 +407,38 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max for (true=min, false=max) |
|
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null) |
|
|
|
*/ |
|
|
|
public double mvMultJacMinMaxSingle(int s, double vect[], boolean min, int strat[]); |
|
|
|
public default double mvMultJacMinMaxSingle(int s, double vect[], boolean min, int strat[]) |
|
|
|
{ |
|
|
|
int stratCh = -1; |
|
|
|
double minmax = 0; |
|
|
|
boolean first = true; |
|
|
|
|
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
double d = mvMultJacSingle(s, choice, vect); |
|
|
|
|
|
|
|
// Check whether we have exceeded min/max so far |
|
|
|
if (first || (min && d < minmax) || (!min && d > minmax)) { |
|
|
|
minmax = d; |
|
|
|
// If strategy generation is enabled, remember optimal choice |
|
|
|
if (strat != null) { |
|
|
|
stratCh = choice; |
|
|
|
} |
|
|
|
} |
|
|
|
first = false; |
|
|
|
} |
|
|
|
// If strategy generation is enabled, store optimal choice |
|
|
|
if (strat != null && !first) { |
|
|
|
// For max, only remember strictly better choices |
|
|
|
if (min) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} else if (strat[s] == -1 || minmax > vect[s]) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return minmax; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a single row of Jacobi-style matrix-vector multiplication for a specific choice. |
|
|
|
@ -363,7 +447,31 @@ public interface MDP extends NondetModel |
|
|
|
* @param i Choice index |
|
|
|
* @param vect Vector to multiply by |
|
|
|
*/ |
|
|
|
public double mvMultJacSingle(int s, int i, double vect[]); |
|
|
|
public default double mvMultJacSingle(int s, int i, double vect[]) |
|
|
|
{ |
|
|
|
class Jacobi { |
|
|
|
double diag = 1.0; |
|
|
|
double d = 0.0; |
|
|
|
|
|
|
|
void accept(int s, int t, double prob) { |
|
|
|
if (t != s) { |
|
|
|
d += prob * vect[t]; |
|
|
|
} else { |
|
|
|
diag -= prob; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Jacobi jac = new Jacobi(); |
|
|
|
forEachTransition(s, i, jac::accept); |
|
|
|
|
|
|
|
double d = jac.d; |
|
|
|
double diag = jac.diag; |
|
|
|
if (diag > 0) |
|
|
|
d /= diag; |
|
|
|
|
|
|
|
return d; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a matrix-vector multiplication and sum of rewards followed by min/max, i.e. one step of value iteration. |
|
|
|
@ -395,7 +503,53 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max for (true=min, false=max) |
|
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null) |
|
|
|
*/ |
|
|
|
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]); |
|
|
|
public default double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]) |
|
|
|
{ |
|
|
|
int stratCh = -1; |
|
|
|
double minmax = 0; |
|
|
|
boolean first = true; |
|
|
|
|
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
double d = mvMultRewSingle(s, choice, vect, mdpRewards); |
|
|
|
// Check whether we have exceeded min/max so far |
|
|
|
if (first || (min && d < minmax) || (!min && d > minmax)) { |
|
|
|
minmax = d; |
|
|
|
// If strategy generation is enabled, remember optimal choice |
|
|
|
if (strat != null) |
|
|
|
stratCh = choice; |
|
|
|
} |
|
|
|
first = false; |
|
|
|
} |
|
|
|
// If strategy generation is enabled, store optimal choice |
|
|
|
if (strat != null && !first) { |
|
|
|
// For max, only remember strictly better choices |
|
|
|
if (min) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} else if (strat[s] == -1 || minmax > vect[s]) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return minmax; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice. |
|
|
|
* i.e. rew(s) + rew_i(s) + sum_j P_i(s,j)*vect[j] |
|
|
|
* @param s State (row) index |
|
|
|
* @param i Choice index |
|
|
|
* @param vect Vector to multiply by |
|
|
|
* @param mdpRewards The rewards (MDP rewards) |
|
|
|
*/ |
|
|
|
public default double mvMultRewSingle(int s, int i, double vect[], MDPRewards mdpRewards) |
|
|
|
{ |
|
|
|
double d = mdpRewards.getStateReward(s); |
|
|
|
d += mdpRewards.getTransitionReward(s, i); |
|
|
|
d += sumOverTransitions(s, i, (__, t, prob) -> { |
|
|
|
return prob * vect[t]; |
|
|
|
}); |
|
|
|
return d; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a single row of matrix-vector multiplication and sum of rewards for a specific choice. |
|
|
|
@ -403,9 +557,17 @@ public interface MDP extends NondetModel |
|
|
|
* @param s State (row) index |
|
|
|
* @param i Choice index |
|
|
|
* @param vect Vector to multiply by |
|
|
|
* @param mcRewards The rewards |
|
|
|
* @param mcRewards The rewards (DTMC rewards) |
|
|
|
*/ |
|
|
|
public double mvMultRewSingle(int s, int i, double vect[], MCRewards mcRewards); |
|
|
|
public default double mvMultRewSingle(int s, int i, double vect[], MCRewards mcRewards) |
|
|
|
{ |
|
|
|
double d = mcRewards.getStateReward(s); |
|
|
|
// TODO: add transition rewards when added to MCRewards |
|
|
|
d += sumOverTransitions(s, i, (__, t, prob) -> { |
|
|
|
return prob * vect[t]; |
|
|
|
}); |
|
|
|
return d; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of rewards followed by min/max. |
|
|
|
@ -455,7 +617,85 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max for (true=min, false=max) |
|
|
|
* @param strat Storage for (memoryless) strategy choice indices (ignored if null) |
|
|
|
*/ |
|
|
|
public double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]); |
|
|
|
public default double mvMultRewJacMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]) |
|
|
|
{ |
|
|
|
int stratCh = -1; |
|
|
|
double minmax = 0; |
|
|
|
boolean first = true; |
|
|
|
|
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
double d = mvMultRewJacSingle(s, choice, vect, mdpRewards); |
|
|
|
// Check whether we have exceeded min/max so far |
|
|
|
if (first || (min && d < minmax) || (!min && d > minmax)) { |
|
|
|
minmax = d; |
|
|
|
// If strategy generation is enabled, remember optimal choice |
|
|
|
if (strat != null) { |
|
|
|
stratCh = choice; |
|
|
|
} |
|
|
|
} |
|
|
|
first = false; |
|
|
|
} |
|
|
|
// If strategy generation is enabled, store optimal choice |
|
|
|
if (strat != null && !first) { |
|
|
|
// For max, only remember strictly better choices |
|
|
|
if (min) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} else if (strat[s] == -1 || minmax > vect[s]) { |
|
|
|
strat[s] = stratCh; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return minmax; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/** |
|
|
|
* Do a single row of Jacobi-style matrix-vector multiplication and sum of rewards, |
|
|
|
* for a specific choice. |
|
|
|
* i.e. return rew(s) + rew_i(s) + (sum_{j!=s} P_i(s,j)*vect[j]) / 1-P_i(s,s) } |
|
|
|
* @param s State (row) index |
|
|
|
* @param i the choice index |
|
|
|
* @param vect Vector to multiply by |
|
|
|
* @param mdpRewards The rewards |
|
|
|
*/ |
|
|
|
public default double mvMultRewJacSingle(int s, int i, double vect[], MDPRewards mdpRewards) |
|
|
|
{ |
|
|
|
class Jacobi { |
|
|
|
double diag = 1.0; |
|
|
|
double d = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, i); |
|
|
|
boolean onlySelfLoops = true; |
|
|
|
|
|
|
|
void accept(int s, int t, double prob) { |
|
|
|
if (t != s) { |
|
|
|
d += prob * vect[t]; |
|
|
|
onlySelfLoops = false; |
|
|
|
} else { |
|
|
|
diag -= prob; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
Jacobi jac = new Jacobi(); |
|
|
|
forEachTransition(s, i, jac::accept); |
|
|
|
|
|
|
|
double d = jac.d; |
|
|
|
double diag = jac.diag; |
|
|
|
|
|
|
|
if (jac.onlySelfLoops) { |
|
|
|
if (d != 0) { |
|
|
|
d = (d > 0 ? Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY); |
|
|
|
} else { |
|
|
|
// no reward & only self-loops: d remains 0 |
|
|
|
d = 0; |
|
|
|
} |
|
|
|
} else { |
|
|
|
// not only self-loops, do Jacobi division |
|
|
|
if (diag > 0) |
|
|
|
d /= diag; |
|
|
|
} |
|
|
|
|
|
|
|
return d; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of rewards. |
|
|
|
@ -465,7 +705,22 @@ public interface MDP extends NondetModel |
|
|
|
* @param min Min or max (true=min, false=max) |
|
|
|
* @param val Min or max value to match |
|
|
|
*/ |
|
|
|
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val); |
|
|
|
public default List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val) |
|
|
|
{ |
|
|
|
// Create data structures to store strategy |
|
|
|
final List<Integer> result = new ArrayList<Integer>(); |
|
|
|
|
|
|
|
// One row of matrix-vector operation |
|
|
|
for (int choice = 0, numChoices = getNumChoices(s); choice < numChoices; choice++) { |
|
|
|
double d = mvMultRewSingle(s, choice, vect, mdpRewards); |
|
|
|
// Store strategy info if value matches |
|
|
|
if (PrismUtils.doublesAreClose(val, d, 1e-12, false)) { |
|
|
|
result.add(choice); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
/** |
|
|
|
* Multiply the probability matrix induced by the MDP and {@code strat} |
|
|
|
@ -480,6 +735,13 @@ public interface MDP extends NondetModel |
|
|
|
* @param source Vector to multiply matrix with |
|
|
|
* @param dest Vector to write result to. |
|
|
|
*/ |
|
|
|
public void mvMultRight(int[] states, int[] strat, double[] source, double[] dest); |
|
|
|
public default void mvMultRight(int[] states, int[] strat, double[] source, double[] dest) |
|
|
|
{ |
|
|
|
for (int state : states) { |
|
|
|
forEachTransition(state, strat[state], (int s, int t, double prob) -> { |
|
|
|
dest[t] += prob * source[s]; |
|
|
|
}); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} |