Browse Source

Explicit engine improvements, mainly MDP rewards:

* Explicit engine gets MDP rewards (transition rewards only) from the model
* Rewards detached from MDPs (but attached ones still available, e.g. for A-R)
* Various bug fixes in MDPSparse, especially wrt rewards
* Few code tidies



git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@3215 bbc10eb1-c90d-0410-af57-cb519fbb1720
master
Dave Parker 15 years ago
parent
commit
85147f1a71
  1. 11
      prism/src/explicit/MDP.java
  2. 140
      prism/src/explicit/MDPModelChecker.java
  3. 18
      prism/src/explicit/MDPSimple.java
  4. 120
      prism/src/explicit/MDPSparse.java
  5. 60
      prism/src/explicit/ProbModelChecker.java
  6. 12
      prism/src/explicit/QuantAbstractRefine.java
  7. 43
      prism/src/explicit/rewards/MDPRewards.java
  8. 107
      prism/src/explicit/rewards/MDPRewardsSimple.java

11
prism/src/explicit/MDP.java

@ -29,6 +29,8 @@ package explicit;
import java.util.*;
import java.util.Map.Entry;
import explicit.rewards.*;
/**
* Interface for classes that provide (read) access to an explicit-state MDP.
*/
@ -157,32 +159,35 @@ public interface MDP extends Model
* Do a matrix-vector multiplication and sum of action reward followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param adv Storage for adversary choice indices (ignored if null)
*/
public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]);
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]);
/**
* Do a single row of matrix-vector multiplication and sum of action reward followed by min/max.
* i.e. return min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
* @param s Row index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param adv Storage for adversary choice indices (ignored if null)
*/
public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[]);
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]);
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of action reward.
* @param s Row index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max (true=min, false=max)
* @param val Min or max value to match
*/
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val);
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val);
/**
* Multiply the probability matrix induced by the mdp and {@code adv}

140
prism/src/explicit/MDPModelChecker.java

@ -32,6 +32,8 @@ import parser.ast.Expression;
import parser.ast.ExpressionTemporal;
import java.util.Map.Entry;
import explicit.rewards.MDPRewards;
import prism.*;
/**
@ -156,7 +158,7 @@ public class MDPModelChecker extends ProbModelChecker
/**
* Compute rewards for the contents of an R operator.
*/
protected StateValues checkRewardFormula(Model model, Expression expr, boolean min) throws PrismException
protected StateValues checkRewardFormula(Model model, MDPRewards modelRewards, Expression expr, boolean min) throws PrismException
{
StateValues rewards = null;
@ -164,7 +166,7 @@ public class MDPModelChecker extends ProbModelChecker
ExpressionTemporal exprTemp = (ExpressionTemporal) expr;
switch (exprTemp.getOperator()) {
case ExpressionTemporal.R_F:
rewards = checkRewardReach(model, exprTemp, min);
rewards = checkRewardReach(model, modelRewards, exprTemp, min);
break;
default:
throw new PrismException("Explicit engine does not yet handle the " + exprTemp.getOperatorSymbol() + " operator in the R operator");
@ -180,7 +182,7 @@ public class MDPModelChecker extends ProbModelChecker
/**
* Compute rewards for a reachability reward operator.
*/
protected StateValues checkRewardReach(Model model, ExpressionTemporal expr, boolean min) throws PrismException
protected StateValues checkRewardReach(Model model, MDPRewards modelRewards, ExpressionTemporal expr, boolean min) throws PrismException
{
BitSet b;
StateValues rewards = null;
@ -193,8 +195,7 @@ public class MDPModelChecker extends ProbModelChecker
// mainLog.print("\nb = " + JDD.GetNumMintermsString(b1,
// allDDRowVars.n()));
//res = computeFracRewardsPolIter((MDP) model, null, null, min);
res = computeReachRewards((MDP) model, b, min);
res = computeReachRewards((MDP) model, modelRewards, b, min);
rewards = StateValues.createFromDoubleArray(res.soln);
return rewards;
@ -960,25 +961,27 @@ public class MDPModelChecker extends ProbModelChecker
/**
* Compute expected reachability rewards.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param min Min or max rewards (true=min, false=max)
*/
public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min) throws PrismException
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
{
return computeReachRewards(mdp, target, min, null, null);
return computeReachRewards(mdp, mdpRewards, target, min, null, null);
}
/**
* Compute expected reachability rewards.
* i.e. compute the min/max reward accumulated to reach a state in {@code target}.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param min Min or max rewards (true=min, false=max)
* @param init Optionally, an initial solution vector (may be overwritten)
* @param known Optionally, a set of states for which the exact answer is known
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min, double init[], BitSet known) throws PrismException
public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known) throws PrismException
{
ModelCheckerResult res = null;
BitSet inf;
@ -1018,7 +1021,7 @@ public class MDPModelChecker extends ProbModelChecker
// Compute rewards
switch (solnMethod) {
case VALUE_ITERATION:
res = computeReachRewardsValIter(mdp, target, inf, min, init, known);
res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known);
break;
default:
throw new PrismException("Unknown MDP solution method " + solnMethod);
@ -1038,6 +1041,7 @@ public class MDPModelChecker extends ProbModelChecker
/**
* Compute expected reachability rewards using value iteration.
* @param mdp The MDP
* @param mdpRewards The rewards
* @param target Target states
* @param inf States for which reward is infinite
* @param min Min or max rewards (true=min, false=max)
@ -1045,7 +1049,7 @@ public class MDPModelChecker extends ProbModelChecker
* @param known Optionally, a set of states for which the exact answer is known
* Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
*/
protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException
protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException
{
ModelCheckerResult res;
BitSet unknown;
@ -1095,7 +1099,7 @@ public class MDPModelChecker extends ProbModelChecker
//mainLog.println(soln);
iters++;
// Matrix-vector multiply and min/max ops
mdp.mvMultRewMinMax(soln, min, soln2, unknown, false, null);
mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, unknown, false, null);
// Check termination
done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE);
// Swap vectors for next iter
@ -1122,124 +1126,18 @@ public class MDPModelChecker extends ProbModelChecker
* (More precisely, list of indices of choices resulting in min/max.)
* (Note: indices are guaranteed to be sorted in ascending order.)
* @param mdp The MDP
* @param mdpRewards The rewards
* @param state The state to generate strategy info for
* @param target The set of target states to reach
* @param min Min or max rewards (true=min, false=max)
* @param lastSoln Vector of values from which to recompute in one iteration
*/
public List<Integer> expReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
{
double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, min, null);
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, min, val);
double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null);
return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
}
/**
* Compute fractional rewards using policy iteration.
* @param mdp: The MDP
* @param min: Min or max probabilities (true=min, false=max)
*/
/*protected ModelCheckerResult computeFracRewardsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min) throws PrismException
{
int n;
long timer;
int adv[];
final String acts = settings.getString(PrismSettings.PRISM_AR_OPTIONS);
final Map<String, Double> costMap = new HashMap<String, Double>();
final Map<String, Double> rewardMap = new HashMap<String, Double>();
for (String tuple : acts.split("\\s*,\\s*")) {
String[] nameAndValues = tuple.split("\\s*=\\s*");
assert(nameAndValues.length == 2);
String[] values = nameAndValues[1].split("/");
assert (values.length == 2);
final String name = "[" + nameAndValues[0] + "]";
costMap.put(name, Double.parseDouble(values[0]));
rewardMap.put(name, Double.parseDouble(values[1]));
}
final double costMatrix[][] = new double[mdp.getNumStates()][];
final double rewardMatrix[][] = new double[mdp.getNumStates()][];
for (int s = 0; s < mdp.getNumStates(); s++) {
costMatrix[s] = new double[mdp.getNumChoices(s)];
rewardMatrix[s] = new double[mdp.getNumChoices(s)];
for (int a = 0; a < mdp.getNumChoices(s); a++) {
String action = (String) mdp.getAction(s, a);
if (action != null && costMap.containsKey(action)) {
costMatrix[s][a] = costMap.get(action);
rewardMatrix[s][a] = rewardMap.get(action);
}
}
}
mainLog.println("Started end-component calculation");
timer = System.currentTimeMillis();
EndcomponentsOf eco = new EndcomponentsOf(mdp);
List<List<Integer>> ecs = eco.getEndComponents();
mainLog.println("Done with end-component calculation: " + (System.currentTimeMillis() - timer) / 1000.0);
mainLog.println(ecs.size() + " end-components.");
// Store num states
n = mdp.getNumStates();
// Generate initial adversary
adv = new int[n];
// Start policy iteration
timer = System.currentTimeMillis();
mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
// Optimize every end-component separately
for (List<Integer> ec : ecs) {
// Build the subMDP induced by the end-component
MDP subMDP = new MDPSparse(mdp, ec, eco.getAvailableActions());
// Build the cost- and reward matrices induced by the end-components
double[][] subCostMatrix = new double[ec.size()][];
double[][] subRewardMatrix = new double[ec.size()][];
for (int i = 0; i < ec.size(); i++) {
List<Integer> actions = eco.getAvailableActions().get(ec.get(i));
subCostMatrix[i] = new double[actions.size()];
subRewardMatrix[i] = new double[actions.size()];
for (int a = 0; a < actions.size(); a++) {
subCostMatrix[i][a] = costMatrix[ec.get(i)][actions.get(a)];
subRewardMatrix[i][a] = rewardMatrix[ec.get(i)][actions.get(a)];
}
}
// Actually optimize the components
RatioECOptimizer c = new RatioECOptimizer(subMDP, subCostMatrix, subRewardMatrix);
if (! c.isZero()) {
c.computeOptimalStrategy();
} else {
// We don't need to do anything because isZero already calculated
// a strategy for us
}
// reverse translate the strategy
int[] subAdv = c.getAdv();
for (int i = 0; i < ec.size(); i++) {
adv[ec.get(i)] = eco.getAvailableActions().get(ec.get(i)).get(subAdv[i]);
}
}
for (int s = 0; s < mdp.getNumStates(); s++) {
mainLog.println(mdp.getStatesList().get(s) + ": " + mdp.getAction(s, adv[s]));
mainLog.println(" (" + costMatrix[s][adv[s]] + ", " + rewardMatrix[s][adv[s]] + ")");
Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, adv[s]);
while (it.hasNext()) {
Entry<Integer, Double> next = it.next();
mainLog.println("\t -> " + next.getKey() + " (" + next.getValue() + ")");
}
}
// TODO: Compose strategy
System.out.println("DONE!!!");
System.exit(0);
return null;
}*/
/**
* Simple test program.
*/

18
prism/src/explicit/MDPSimple.java

@ -30,6 +30,8 @@ import java.util.*;
import java.util.Map.Entry;
import java.io.*;
import explicit.rewards.MDPRewards;
import prism.ModelType;
import prism.PrismException;
import prism.PrismUtils;
@ -853,24 +855,24 @@ public class MDPSimple extends ModelSimple implements MDP
}
@Override
public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[])
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[])
{
int s;
// Loop depends on subset/complement arguments
if (subset == null) {
for (s = 0; s < numStates; s++)
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
} else if (complement) {
for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
} else {
for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
}
}
@Override
public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[])
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
{
int j, k;
double d, prob, minmax;
@ -886,7 +888,7 @@ public class MDPSimple extends ModelSimple implements MDP
for (Distribution distr : step) {
j++;
// Compute sum for this distribution
d = getTransitionReward(s, j);
d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
for (Map.Entry<Integer, Double> e : distr) {
k = (Integer) e.getKey();
prob = (Double) e.getValue();
@ -902,7 +904,7 @@ public class MDPSimple extends ModelSimple implements MDP
}
@Override
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val)
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
{
int j, k;
double d, prob;
@ -917,7 +919,7 @@ public class MDPSimple extends ModelSimple implements MDP
for (Distribution distr : step) {
j++;
// Compute sum for this distribution
d = getTransitionReward(s, j);
d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
for (Map.Entry<Integer, Double> e : distr) {
k = (Integer) e.getKey();
prob = (Double) e.getValue();

120
prism/src/explicit/MDPSparse.java

@ -30,6 +30,8 @@ import java.util.*;
import java.util.Map.Entry;
import java.io.*;
import explicit.rewards.MDPRewards;
import prism.ModelType;
import prism.PrismException;
import prism.PrismUtils;
@ -54,9 +56,7 @@ public class MDPSparse extends ModelSparse implements MDP
protected int rowStarts[];
// Action labels
/**
* Array containing actions; is of size numDistrs
*/
/** Array of action labels for choices (is of size numDistrs) */
protected Object actions[];
// Rewards
@ -79,7 +79,7 @@ public class MDPSparse extends ModelSparse implements MDP
{
this(mdp, false);
}
/**
* Copy constructor for a (sub-)MDP from a given MDP.
* The states and actions will be indexed as given by the order
@ -89,25 +89,26 @@ public class MDPSparse extends ModelSparse implements MDP
* @param states States to copy
* @param actions Actions to copy
*/
public MDPSparse(MDP mdp, List<Integer> states, List<List<Integer>> actions) {
public MDPSparse(MDP mdp, List<Integer> states, List<List<Integer>> actions)
{
initialise(states.size());
for (int in : mdp.getInitialStates()) {
addInitialState(in);
}
// statesList = new ArrayList<State>(states.size());
statesList = new ArrayList<parser.State>();
for (int s : states) {
statesList.add(mdp.getStatesList().get(s));
}
numDistrs = 0;
numTransitions = 0;
maxNumDistrs = 0;
for (int i = 0; i < states.size(); i++) {
int s = states.get(i);
final int numChoices = actions.get(s).size();
numDistrs += numChoices;
if (numChoices > maxNumDistrs) {
@ -117,20 +118,20 @@ public class MDPSparse extends ModelSparse implements MDP
numTransitions += mdp.getNumTransitions(s, a);
}
}
nonZeros = new double[numTransitions];
cols = new int[numTransitions];
choiceStarts = new int[numDistrs+1];
rowStarts = new int[numStates+1];
this.actions = new Object[numDistrs + 1];
choiceStarts = new int[numDistrs + 1];
rowStarts = new int[numStates + 1];
this.actions = new Object[numDistrs];
int choiceIndex = 0;
int colIndex = 0;
int[] reverseStates = new int[mdp.getNumStates()];
for (int i = 0; i < states.size(); i++) {
reverseStates[states.get(i)] = i;
}
for (int i = 0; i < states.size(); i++) {
int s = states.get(i);
rowStarts[i] = choiceIndex;
@ -144,13 +145,13 @@ public class MDPSparse extends ModelSparse implements MDP
cols[colIndex] = reverseStates[next.getKey()];
nonZeros[colIndex] = next.getValue();
colIndex++;
}
}
}
}
}
choiceStarts[numDistrs] = numTransitions;
rowStarts[numStates] = numDistrs;
// TODO copy rewards
}
@ -181,12 +182,12 @@ public class MDPSparse extends ModelSparse implements MDP
cols = new int[numTransitions];
choiceStarts = new int[numDistrs + 1];
rowStarts = new int[numStates + 1];
actions = new Object[numDistrs + 1];
actions = new Object[numDistrs];
j = k = 0;
for (i = 0; i < numStates; i++) {
rowStarts[i] = j;
for (int l = 0; l < mdp.actions.get(i).size(); l++) {
actions[j + l] = mdp.actions.get(i).get(l);
actions[j + l] = mdp.actions.get(i).get(l);
}
for (Distribution distr : mdp.trans.get(i)) {
choiceStarts[j] = k;
@ -212,8 +213,8 @@ public class MDPSparse extends ModelSparse implements MDP
}
choiceStarts[numDistrs] = numTransitions;
rowStarts[numStates] = numDistrs;
// TODO: copy actions, rewards
// TODO: copy rewards
}
/**
@ -254,9 +255,13 @@ public class MDPSparse extends ModelSparse implements MDP
cols = new int[numTransitions];
choiceStarts = new int[numDistrs + 1];
rowStarts = new int[numStates + 1];
actions = new Object[numDistrs];
j = k = 0;
for (i = 0; i < numStates; i++) {
rowStarts[i] = j;
for (int l = 0; l < mdp.actions.get(permutInv[i]).size(); l++) {
actions[j + l] = mdp.actions.get(permutInv[i]).get(l);
}
for (Distribution distr : mdp.trans.get(permutInv[i])) {
choiceStarts[j] = k;
for (Map.Entry<Integer, Double> e : distr) {
@ -281,7 +286,8 @@ public class MDPSparse extends ModelSparse implements MDP
}
choiceStarts[numDistrs] = numTransitions;
rowStarts[numStates] = numDistrs;
// TODO: copy actions, rewards
// TODO: copy rewards
}
// Mutators (for ModelSparse)
@ -607,44 +613,52 @@ public class MDPSparse extends ModelSparse implements MDP
@Override
public Iterator<Entry<Integer, Double>> getTransitionsIterator(final int s, final int i)
{
return new Iterator<Entry<Integer, Double>> () {
return new Iterator<Entry<Integer, Double>>()
{
final int start = choiceStarts[rowStarts[s] + i];
int col = start;
final int end = choiceStarts[rowStarts[s] + i + 1];
@Override
public boolean hasNext() {
public boolean hasNext()
{
return col < end;
}
@Override
public Entry<Integer, Double> next() {
public Entry<Integer, Double> next()
{
assert (col < end);
final int i = col;
col++;
return new Entry<Integer, Double>() {
return new Entry<Integer, Double>()
{
int key = cols[i];
double value = nonZeros[i];
@Override
public Integer getKey() {
public Integer getKey()
{
return key;
}
@Override
public Double getValue() {
public Double getValue()
{
return value;
}
@Override
public Double setValue(Double arg0) {
public Double setValue(Double arg0)
{
throw new UnsupportedOperationException();
}
}
};
}
@Override
public void remove() {
public void remove()
{
throw new UnsupportedOperationException();
}
};
@ -653,8 +667,7 @@ public class MDPSparse extends ModelSparse implements MDP
@Override
public Object getAction(int s, int i)
{
// TODO
return null;
return actions[rowStarts[s] + i];
}
@Override
@ -950,38 +963,38 @@ public class MDPSparse extends ModelSparse implements MDP
}
@Override
public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[])
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[])
{
int s;
// Loop depends on subset/complement arguments
if (subset == null) {
for (s = 0; s < numStates; s++)
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
} else if (complement) {
for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
} else {
for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
}
}
@Override
public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[])
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
{
int j, k, l1, h1, l2, h2;
double d, minmax;
boolean first;
// TODO: implement adv. gen.
minmax = 0;
first = true;
l1 = rowStarts[s];
h1 = rowStarts[s + 1];
for (j = l1; j < h1; j++) {
// Compute sum for this distribution
d = getTransitionReward(s, j);
d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j - l1) : getTransitionReward(s, j - l1);
l2 = choiceStarts[j];
h2 = choiceStarts[j + 1];
for (k = l2; k < h2; k++) {
@ -997,7 +1010,7 @@ public class MDPSparse extends ModelSparse implements MDP
}
@Override
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val)
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
{
int j, k, l1, h1, l2, h2;
double d;
@ -1010,7 +1023,7 @@ public class MDPSparse extends ModelSparse implements MDP
h1 = rowStarts[s + 1];
for (j = l1; j < h1; j++) {
// Compute sum for this distribution
d = getTransitionReward(s, j);
d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
l2 = choiceStarts[j];
h2 = choiceStarts[j + 1];
for (k = l2; k < h2; k++) {
@ -1031,6 +1044,7 @@ public class MDPSparse extends ModelSparse implements MDP
public String toString()
{
int i, j, k, l1, h1, l2, h2;
Object o;
String s = "";
s = "[ ";
for (i = 0; i < numStates; i++) {
@ -1040,7 +1054,12 @@ public class MDPSparse extends ModelSparse implements MDP
l1 = rowStarts[i];
h1 = rowStarts[i + 1];
for (j = l1; j < h1; j++) {
s += (j == l1) ? "{" : ", {";
if (j > l1)
s += ",";
o = actions[j];
if (o != null)
s += o + ":";
s += "{";
l2 = choiceStarts[j];
h2 = choiceStarts[j + 1];
for (k = l2; k < h2; k++) {
@ -1079,9 +1098,10 @@ public class MDPSparse extends ModelSparse implements MDP
// TODO: compare rewards (complicated: null = 0,0,0,0)*/
return true;
}
@Override
public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest) {
public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest)
{
for (int s : states) {
int j, l2, h2;
int k = adv[s];

60
prism/src/explicit/ProbModelChecker.java

@ -142,7 +142,8 @@ public class ProbModelChecker extends StateModelChecker
boolean min = false; // For nondeterministic models, are we finding min (true) or max (false) rewards
ModelType modelType = model.getModelType();
StateValues rews = null;
MCRewards modelRewards = null;
MCRewards mcRewards = null;
MDPRewards mdpRewards = null;
int i;
// Get info from reward operator
@ -194,7 +195,10 @@ public class ProbModelChecker extends StateModelChecker
switch (modelType) {
case CTMC:
case DTMC:
modelRewards = buildMCRewardStructure(model, rewStruct);
mcRewards = buildMCRewardStructure(model, rewStruct);
break;
case MDP:
mdpRewards = buildMDPRewardStructure((MDP) model, rewStruct);
break;
default:
throw new PrismException("Cannot build rewards for " + modelType + "s");
@ -204,13 +208,13 @@ public class ProbModelChecker extends StateModelChecker
mainLog.println("Building reward structure...");
switch (modelType) {
case CTMC:
rews = ((CTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression());
rews = ((CTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression());
break;
case DTMC:
rews = ((DTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression());
rews = ((DTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression());
break;
case MDP:
rews = ((MDPModelChecker) this).checkRewardFormula(model, expr.getExpression(), min);
rews = ((MDPModelChecker) this).checkRewardFormula(model, mdpRewards, expr.getExpression(), min);
break;
default:
throw new PrismException("Cannot model check " + expr + " for " + modelType + "s");
@ -237,7 +241,7 @@ public class ProbModelChecker extends StateModelChecker
case CTMC:
case DTMC:
if (rewStr.getNumTransItems() > 0) {
throw new PrismException("Explicit engine does not yet handle transition rewards");
throw new PrismException("Explicit engine does not yet handle transition rewards for D/CTMCs");
}
// Special case: constant rewards
if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) {
@ -245,9 +249,9 @@ public class ProbModelChecker extends StateModelChecker
}
// Normal: state rewards
else {
MCRewardsStateArray rewSA = new MCRewardsStateArray(model.getNumStates());
numStates = model.getNumStates();
statesList = model.getStatesList();
MCRewardsStateArray rewSA = new MCRewardsStateArray(numStates);
n = rewStr.getNumItems();
for (i = 0; i < n; i++) {
guard = rewStr.getStates(i);
@ -264,4 +268,46 @@ public class ProbModelChecker extends StateModelChecker
throw new PrismException("Cannot build rewards for " + model.getModelType() + "s");
}
}
private MDPRewards buildMDPRewardStructure(MDP mdp, RewardStruct rewStr) throws PrismException
{
//MCRewards modelRewards = null;
List<State> statesList;
Expression guard;
String action;
Object mdpAction;
int i, j, k, n, numStates, numChoices;
if (rewStr.getNumStateItems() > 0) {
throw new PrismException("Explicit engine does not yet handle state rewards for MDPs");
}
// Special case: constant rewards
// TODO
/*if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) {
return new MCRewardsStateConstant(rewStr.getReward(0).evaluateDouble());
}*/
// Normal: transition rewards
else {
numStates = mdp.getNumStates();
statesList = mdp.getStatesList();
MDPRewardsSimple rewSimple = new MDPRewardsSimple(numStates);
n = rewStr.getNumItems();
for (i = 0; i < n; i++) {
guard = rewStr.getStates(i);
action = rewStr.getSynch(i);
for (j = 0; j < numStates; j++) {
if (guard.evaluateBoolean(statesList.get(j))) {
numChoices = mdp.getNumChoices(j);
for (k = 0; k < numChoices; k++) {
mdpAction = mdp.getAction(j, k);
if (mdpAction == null ? (action == null) : mdpAction.equals(action)) {
rewSimple.setTransitionReward(j, k, rewStr.getReward(i).evaluateDouble(statesList.get(j)));
}
}
}
}
}
return rewSimple;
}
}
}

12
prism/src/explicit/QuantAbstractRefine.java

@ -801,9 +801,9 @@ public abstract class QuantAbstractRefine
switch (abstractionType) {
case MDP:
if (optimise && refinementNum > 0) {
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, lbSoln, known);
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, lbSoln, known);
} else {
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, null, null);
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, null, null);
}
break;
default:
@ -821,9 +821,9 @@ public abstract class QuantAbstractRefine
case MDP:
if (optimise) {
double lbCopy[] = Utils.cloneDoubleArray(lbSoln);
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, lbCopy, known);
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, lbCopy, known);
} else {
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, null, null);
res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, null, null);
}
break;
default:
@ -1021,9 +1021,9 @@ public abstract class QuantAbstractRefine
ubLastSoln);
break;
case EXP_REACH:
lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, true,
lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, true,
lbLastSoln);
ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, false,
ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, false,
ubLastSoln);
break;
}

43
prism/src/explicit/rewards/MDPRewards.java

@ -0,0 +1,43 @@
//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit.rewards;
/**
* Classes that provide (read) access to explicit-state rewards for an MDP.
*/
public abstract class MDPRewards
{
/**
* Get the state reward for state {@code s}.
*/
public abstract double getStateReward(int s);
/**
* Get the transition reward for the {@code i}th choice from state {@code s}.
*/
public abstract double getTransitionReward(int s, int i);
}

107
prism/src/explicit/rewards/MDPRewardsSimple.java

@ -0,0 +1,107 @@
//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit.rewards;
import java.util.ArrayList;
import java.util.List;
/**
* Simple explicit-state storage of rewards for an MDP.
* Like the related class MDPSimple, this is not especially efficient, but mutable (in terms of size).
*/
public class MDPRewardsSimple extends MDPRewards
{
/** Number of state */
protected int numStates;
/** State rewards **/
protected List<Double> stateRewards;
/** Transition rewards **/
protected List<List<Double>> transRewards;
/**
* Constructor: all zero rewards.
* @param numStates Number of states
*/
public MDPRewardsSimple(int numStates)
{
this.numStates = numStates;
// Initially lists are just null (denoting all 0)
stateRewards = null;
transRewards = null;
}
// Mutators
/**
* Set the reward for choice {@code i} of state {@code s} to {@code r}.
*/
public void setTransitionReward(int s, int i, double r)
{
List<Double> list;
// If no rewards array created yet, create it
if (transRewards == null) {
transRewards = new ArrayList<List<Double>>(numStates);
for (int j = 0; j < numStates; j++)
transRewards.add(null);
}
// If no rewards for state i yet, create list
if (transRewards.get(s) == null) {
list = new ArrayList<Double>();
transRewards.set(s, list);
} else {
list = transRewards.get(s);
}
// If list not big enough, extend
int n = i - list.size() + 1;
if (n > 0) {
for (int j = 0; j < n; j++) {
list.add(0.0);
}
}
// Set reward
list.set(i, r);
}
// Accessors (for MDPRewards)
@Override
public double getStateReward(int s)
{
return stateRewards.get(s);
}
@Override
public double getTransitionReward(int s, int i)
{
List<Double> list;
if (transRewards == null || (list = transRewards.get(s)) == null)
return 0.0;
if (list.size() <= i)
return 0.0;
return list.get(i);
}
}
Loading…
Cancel
Save