From 85147f1a71543fa3d23041fc256debf4888c67f5 Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Tue, 5 Jul 2011 22:37:02 +0000 Subject: [PATCH] Explicit engine improvements, mainly MDP rewards: * Explicit engine gets MDP rewards (transition rewards only) from the model * Rewards detached from MDPs (but attached ones still available, e.g. for A-R) * Various bug fixes in MDPSparse, especially wrt rewards * Few code tidies git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@3215 bbc10eb1-c90d-0410-af57-cb519fbb1720 --- prism/src/explicit/MDP.java | 11 +- prism/src/explicit/MDPModelChecker.java | 140 +++--------------- prism/src/explicit/MDPSimple.java | 18 ++- prism/src/explicit/MDPSparse.java | 120 ++++++++------- prism/src/explicit/ProbModelChecker.java | 60 +++++++- prism/src/explicit/QuantAbstractRefine.java | 12 +- prism/src/explicit/rewards/MDPRewards.java | 43 ++++++ .../explicit/rewards/MDPRewardsSimple.java | 107 +++++++++++++ 8 files changed, 316 insertions(+), 195 deletions(-) create mode 100644 prism/src/explicit/rewards/MDPRewards.java create mode 100644 prism/src/explicit/rewards/MDPRewardsSimple.java diff --git a/prism/src/explicit/MDP.java b/prism/src/explicit/MDP.java index 72a3ee03..0cee2140 100644 --- a/prism/src/explicit/MDP.java +++ b/prism/src/explicit/MDP.java @@ -29,6 +29,8 @@ package explicit; import java.util.*; import java.util.Map.Entry; +import explicit.rewards.*; + /** * Interface for classes that provide (read) access to an explicit-state MDP. */ @@ -157,32 +159,35 @@ public interface MDP extends Model * Do a matrix-vector multiplication and sum of action reward followed by min/max, i.e. one step of value iteration. * i.e. for all s: result[s] = min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] } * @param vect Vector to multiply by + * @param mdpRewards The rewards * @param min Min or max for (true=min, false=max) * @param result Vector to store result in * @param subset Only do multiplication for these rows (ignored if null) * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null) * @param adv Storage for adversary choice indices (ignored if null) */ - public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]); + public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]); /** * Do a single row of matrix-vector multiplication and sum of action reward followed by min/max. * i.e. return min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] } * @param s Row index * @param vect Vector to multiply by + * @param mdpRewards The rewards * @param min Min or max for (true=min, false=max) * @param adv Storage for adversary choice indices (ignored if null) */ - public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[]); + public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]); /** * Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of action reward. * @param s Row index * @param vect Vector to multiply by + * @param mdpRewards The rewards * @param min Min or max (true=min, false=max) * @param val Min or max value to match */ - public List mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val); + public List mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val); /** * Multiply the probability matrix induced by the mdp and {@code adv} diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java index 64acf8be..d4d78687 100644 --- a/prism/src/explicit/MDPModelChecker.java +++ b/prism/src/explicit/MDPModelChecker.java @@ -32,6 +32,8 @@ import parser.ast.Expression; import parser.ast.ExpressionTemporal; import java.util.Map.Entry; +import explicit.rewards.MDPRewards; + import prism.*; /** @@ -156,7 +158,7 @@ public class MDPModelChecker extends ProbModelChecker /** * Compute rewards for the contents of an R operator. */ - protected StateValues checkRewardFormula(Model model, Expression expr, boolean min) throws PrismException + protected StateValues checkRewardFormula(Model model, MDPRewards modelRewards, Expression expr, boolean min) throws PrismException { StateValues rewards = null; @@ -164,7 +166,7 @@ public class MDPModelChecker extends ProbModelChecker ExpressionTemporal exprTemp = (ExpressionTemporal) expr; switch (exprTemp.getOperator()) { case ExpressionTemporal.R_F: - rewards = checkRewardReach(model, exprTemp, min); + rewards = checkRewardReach(model, modelRewards, exprTemp, min); break; default: throw new PrismException("Explicit engine does not yet handle the " + exprTemp.getOperatorSymbol() + " operator in the R operator"); @@ -180,7 +182,7 @@ public class MDPModelChecker extends ProbModelChecker /** * Compute rewards for a reachability reward operator. */ - protected StateValues checkRewardReach(Model model, ExpressionTemporal expr, boolean min) throws PrismException + protected StateValues checkRewardReach(Model model, MDPRewards modelRewards, ExpressionTemporal expr, boolean min) throws PrismException { BitSet b; StateValues rewards = null; @@ -193,8 +195,7 @@ public class MDPModelChecker extends ProbModelChecker // mainLog.print("\nb = " + JDD.GetNumMintermsString(b1, // allDDRowVars.n())); - //res = computeFracRewardsPolIter((MDP) model, null, null, min); - res = computeReachRewards((MDP) model, b, min); + res = computeReachRewards((MDP) model, modelRewards, b, min); rewards = StateValues.createFromDoubleArray(res.soln); return rewards; @@ -960,25 +961,27 @@ public class MDPModelChecker extends ProbModelChecker /** * Compute expected reachability rewards. * @param mdp The MDP + * @param mdpRewards The rewards * @param target Target states * @param min Min or max rewards (true=min, false=max) */ - public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min) throws PrismException + public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException { - return computeReachRewards(mdp, target, min, null, null); + return computeReachRewards(mdp, mdpRewards, target, min, null, null); } /** * Compute expected reachability rewards. * i.e. compute the min/max reward accumulated to reach a state in {@code target}. * @param mdp The MDP + * @param mdpRewards The rewards * @param target Target states * @param min Min or max rewards (true=min, false=max) * @param init Optionally, an initial solution vector (may be overwritten) * @param known Optionally, a set of states for which the exact answer is known * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ - public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min, double init[], BitSet known) throws PrismException + public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known) throws PrismException { ModelCheckerResult res = null; BitSet inf; @@ -1018,7 +1021,7 @@ public class MDPModelChecker extends ProbModelChecker // Compute rewards switch (solnMethod) { case VALUE_ITERATION: - res = computeReachRewardsValIter(mdp, target, inf, min, init, known); + res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known); break; default: throw new PrismException("Unknown MDP solution method " + solnMethod); @@ -1038,6 +1041,7 @@ public class MDPModelChecker extends ProbModelChecker /** * Compute expected reachability rewards using value iteration. * @param mdp The MDP + * @param mdpRewards The rewards * @param target Target states * @param inf States for which reward is infinite * @param min Min or max rewards (true=min, false=max) @@ -1045,7 +1049,7 @@ public class MDPModelChecker extends ProbModelChecker * @param known Optionally, a set of states for which the exact answer is known * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values. */ - protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException + protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException { ModelCheckerResult res; BitSet unknown; @@ -1095,7 +1099,7 @@ public class MDPModelChecker extends ProbModelChecker //mainLog.println(soln); iters++; // Matrix-vector multiply and min/max ops - mdp.mvMultRewMinMax(soln, min, soln2, unknown, false, null); + mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, unknown, false, null); // Check termination done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE); // Swap vectors for next iter @@ -1122,124 +1126,18 @@ public class MDPModelChecker extends ProbModelChecker * (More precisely, list of indices of choices resulting in min/max.) * (Note: indices are guaranteed to be sorted in ascending order.) * @param mdp The MDP + * @param mdpRewards The rewards * @param state The state to generate strategy info for * @param target The set of target states to reach * @param min Min or max rewards (true=min, false=max) * @param lastSoln Vector of values from which to recompute in one iteration */ - public List expReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException + public List expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException { - double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, min, null); - return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, min, val); + double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null); + return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val); } - /** - * Compute fractional rewards using policy iteration. - * @param mdp: The MDP - * @param min: Min or max probabilities (true=min, false=max) - */ - /*protected ModelCheckerResult computeFracRewardsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min) throws PrismException - { - int n; - long timer; - int adv[]; - final String acts = settings.getString(PrismSettings.PRISM_AR_OPTIONS); - - final Map costMap = new HashMap(); - final Map rewardMap = new HashMap(); - for (String tuple : acts.split("\\s*,\\s*")) { - String[] nameAndValues = tuple.split("\\s*=\\s*"); - assert(nameAndValues.length == 2); - String[] values = nameAndValues[1].split("/"); - assert (values.length == 2); - final String name = "[" + nameAndValues[0] + "]"; - costMap.put(name, Double.parseDouble(values[0])); - rewardMap.put(name, Double.parseDouble(values[1])); - } - - final double costMatrix[][] = new double[mdp.getNumStates()][]; - final double rewardMatrix[][] = new double[mdp.getNumStates()][]; - for (int s = 0; s < mdp.getNumStates(); s++) { - costMatrix[s] = new double[mdp.getNumChoices(s)]; - rewardMatrix[s] = new double[mdp.getNumChoices(s)]; - - for (int a = 0; a < mdp.getNumChoices(s); a++) { - String action = (String) mdp.getAction(s, a); - if (action != null && costMap.containsKey(action)) { - costMatrix[s][a] = costMap.get(action); - rewardMatrix[s][a] = rewardMap.get(action); - } - } - } - - mainLog.println("Started end-component calculation"); - timer = System.currentTimeMillis(); - EndcomponentsOf eco = new EndcomponentsOf(mdp); - List> ecs = eco.getEndComponents(); - mainLog.println("Done with end-component calculation: " + (System.currentTimeMillis() - timer) / 1000.0); - mainLog.println(ecs.size() + " end-components."); - - // Store num states - n = mdp.getNumStates(); - - // Generate initial adversary - adv = new int[n]; - - // Start policy iteration - timer = System.currentTimeMillis(); - mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")..."); - - // Optimize every end-component separately - for (List ec : ecs) { - // Build the subMDP induced by the end-component - MDP subMDP = new MDPSparse(mdp, ec, eco.getAvailableActions()); - - // Build the cost- and reward matrices induced by the end-components - double[][] subCostMatrix = new double[ec.size()][]; - double[][] subRewardMatrix = new double[ec.size()][]; - for (int i = 0; i < ec.size(); i++) { - List actions = eco.getAvailableActions().get(ec.get(i)); - subCostMatrix[i] = new double[actions.size()]; - subRewardMatrix[i] = new double[actions.size()]; - for (int a = 0; a < actions.size(); a++) { - subCostMatrix[i][a] = costMatrix[ec.get(i)][actions.get(a)]; - subRewardMatrix[i][a] = rewardMatrix[ec.get(i)][actions.get(a)]; - } - } - - // Actually optimize the components - RatioECOptimizer c = new RatioECOptimizer(subMDP, subCostMatrix, subRewardMatrix); - if (! c.isZero()) { - c.computeOptimalStrategy(); - } else { - // We don't need to do anything because isZero already calculated - // a strategy for us - } - // reverse translate the strategy - int[] subAdv = c.getAdv(); - for (int i = 0; i < ec.size(); i++) { - adv[ec.get(i)] = eco.getAvailableActions().get(ec.get(i)).get(subAdv[i]); - } - } - - for (int s = 0; s < mdp.getNumStates(); s++) { - mainLog.println(mdp.getStatesList().get(s) + ": " + mdp.getAction(s, adv[s])); - mainLog.println(" (" + costMatrix[s][adv[s]] + ", " + rewardMatrix[s][adv[s]] + ")"); - Iterator> it = mdp.getTransitionsIterator(s, adv[s]); - while (it.hasNext()) { - Entry next = it.next(); - mainLog.println("\t -> " + next.getKey() + " (" + next.getValue() + ")"); - } - } - - // TODO: Compose strategy - - System.out.println("DONE!!!"); - System.exit(0); - - return null; - }*/ - /** * Simple test program. */ diff --git a/prism/src/explicit/MDPSimple.java b/prism/src/explicit/MDPSimple.java index bd5519a8..20b20421 100644 --- a/prism/src/explicit/MDPSimple.java +++ b/prism/src/explicit/MDPSimple.java @@ -30,6 +30,8 @@ import java.util.*; import java.util.Map.Entry; import java.io.*; +import explicit.rewards.MDPRewards; + import prism.ModelType; import prism.PrismException; import prism.PrismUtils; @@ -853,24 +855,24 @@ public class MDPSimple extends ModelSimple implements MDP } @Override - public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]) + public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]) { int s; // Loop depends on subset/complement arguments if (subset == null) { for (s = 0; s < numStates; s++) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } else if (complement) { for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1)) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } else { for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1)) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } } @Override - public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[]) + public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]) { int j, k; double d, prob, minmax; @@ -886,7 +888,7 @@ public class MDPSimple extends ModelSimple implements MDP for (Distribution distr : step) { j++; // Compute sum for this distribution - d = getTransitionReward(s, j); + d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j); for (Map.Entry e : distr) { k = (Integer) e.getKey(); prob = (Double) e.getValue(); @@ -902,7 +904,7 @@ public class MDPSimple extends ModelSimple implements MDP } @Override - public List mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val) + public List mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val) { int j, k; double d, prob; @@ -917,7 +919,7 @@ public class MDPSimple extends ModelSimple implements MDP for (Distribution distr : step) { j++; // Compute sum for this distribution - d = getTransitionReward(s, j); + d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j); for (Map.Entry e : distr) { k = (Integer) e.getKey(); prob = (Double) e.getValue(); diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java index 9646051f..7a4b065d 100644 --- a/prism/src/explicit/MDPSparse.java +++ b/prism/src/explicit/MDPSparse.java @@ -30,6 +30,8 @@ import java.util.*; import java.util.Map.Entry; import java.io.*; +import explicit.rewards.MDPRewards; + import prism.ModelType; import prism.PrismException; import prism.PrismUtils; @@ -54,9 +56,7 @@ public class MDPSparse extends ModelSparse implements MDP protected int rowStarts[]; // Action labels - /** - * Array containing actions; is of size numDistrs - */ + /** Array of action labels for choices (is of size numDistrs) */ protected Object actions[]; // Rewards @@ -79,7 +79,7 @@ public class MDPSparse extends ModelSparse implements MDP { this(mdp, false); } - + /** * Copy constructor for a (sub-)MDP from a given MDP. * The states and actions will be indexed as given by the order @@ -89,25 +89,26 @@ public class MDPSparse extends ModelSparse implements MDP * @param states States to copy * @param actions Actions to copy */ - public MDPSparse(MDP mdp, List states, List> actions) { + public MDPSparse(MDP mdp, List states, List> actions) + { initialise(states.size()); for (int in : mdp.getInitialStates()) { addInitialState(in); } - + // statesList = new ArrayList(states.size()); statesList = new ArrayList(); for (int s : states) { statesList.add(mdp.getStatesList().get(s)); } - + numDistrs = 0; numTransitions = 0; maxNumDistrs = 0; - + for (int i = 0; i < states.size(); i++) { int s = states.get(i); - + final int numChoices = actions.get(s).size(); numDistrs += numChoices; if (numChoices > maxNumDistrs) { @@ -117,20 +118,20 @@ public class MDPSparse extends ModelSparse implements MDP numTransitions += mdp.getNumTransitions(s, a); } } - + nonZeros = new double[numTransitions]; cols = new int[numTransitions]; - choiceStarts = new int[numDistrs+1]; - rowStarts = new int[numStates+1]; - this.actions = new Object[numDistrs + 1]; + choiceStarts = new int[numDistrs + 1]; + rowStarts = new int[numStates + 1]; + this.actions = new Object[numDistrs]; int choiceIndex = 0; int colIndex = 0; - + int[] reverseStates = new int[mdp.getNumStates()]; for (int i = 0; i < states.size(); i++) { reverseStates[states.get(i)] = i; } - + for (int i = 0; i < states.size(); i++) { int s = states.get(i); rowStarts[i] = choiceIndex; @@ -144,13 +145,13 @@ public class MDPSparse extends ModelSparse implements MDP cols[colIndex] = reverseStates[next.getKey()]; nonZeros[colIndex] = next.getValue(); colIndex++; - } - } + } + } } - + choiceStarts[numDistrs] = numTransitions; rowStarts[numStates] = numDistrs; - + // TODO copy rewards } @@ -181,12 +182,12 @@ public class MDPSparse extends ModelSparse implements MDP cols = new int[numTransitions]; choiceStarts = new int[numDistrs + 1]; rowStarts = new int[numStates + 1]; - actions = new Object[numDistrs + 1]; + actions = new Object[numDistrs]; j = k = 0; for (i = 0; i < numStates; i++) { rowStarts[i] = j; for (int l = 0; l < mdp.actions.get(i).size(); l++) { - actions[j + l] = mdp.actions.get(i).get(l); + actions[j + l] = mdp.actions.get(i).get(l); } for (Distribution distr : mdp.trans.get(i)) { choiceStarts[j] = k; @@ -212,8 +213,8 @@ public class MDPSparse extends ModelSparse implements MDP } choiceStarts[numDistrs] = numTransitions; rowStarts[numStates] = numDistrs; - - // TODO: copy actions, rewards + + // TODO: copy rewards } /** @@ -254,9 +255,13 @@ public class MDPSparse extends ModelSparse implements MDP cols = new int[numTransitions]; choiceStarts = new int[numDistrs + 1]; rowStarts = new int[numStates + 1]; + actions = new Object[numDistrs]; j = k = 0; for (i = 0; i < numStates; i++) { rowStarts[i] = j; + for (int l = 0; l < mdp.actions.get(permutInv[i]).size(); l++) { + actions[j + l] = mdp.actions.get(permutInv[i]).get(l); + } for (Distribution distr : mdp.trans.get(permutInv[i])) { choiceStarts[j] = k; for (Map.Entry e : distr) { @@ -281,7 +286,8 @@ public class MDPSparse extends ModelSparse implements MDP } choiceStarts[numDistrs] = numTransitions; rowStarts[numStates] = numDistrs; - // TODO: copy actions, rewards + + // TODO: copy rewards } // Mutators (for ModelSparse) @@ -607,44 +613,52 @@ public class MDPSparse extends ModelSparse implements MDP @Override public Iterator> getTransitionsIterator(final int s, final int i) { - return new Iterator> () { + return new Iterator>() + { final int start = choiceStarts[rowStarts[s] + i]; int col = start; final int end = choiceStarts[rowStarts[s] + i + 1]; - + @Override - public boolean hasNext() { + public boolean hasNext() + { return col < end; } @Override - public Entry next() { + public Entry next() + { assert (col < end); final int i = col; col++; - return new Entry() { + return new Entry() + { int key = cols[i]; double value = nonZeros[i]; - + @Override - public Integer getKey() { + public Integer getKey() + { return key; } @Override - public Double getValue() { + public Double getValue() + { return value; } @Override - public Double setValue(Double arg0) { + public Double setValue(Double arg0) + { throw new UnsupportedOperationException(); - } + } }; } @Override - public void remove() { + public void remove() + { throw new UnsupportedOperationException(); } }; @@ -653,8 +667,7 @@ public class MDPSparse extends ModelSparse implements MDP @Override public Object getAction(int s, int i) { - // TODO - return null; + return actions[rowStarts[s] + i]; } @Override @@ -950,38 +963,38 @@ public class MDPSparse extends ModelSparse implements MDP } @Override - public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]) + public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]) { int s; // Loop depends on subset/complement arguments if (subset == null) { for (s = 0; s < numStates; s++) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } else if (complement) { for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1)) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } else { for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1)) - result[s] = mvMultRewMinMaxSingle(s, vect, min, adv); + result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv); } } @Override - public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[]) + public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]) { int j, k, l1, h1, l2, h2; double d, minmax; boolean first; - + // TODO: implement adv. gen. - + minmax = 0; first = true; l1 = rowStarts[s]; h1 = rowStarts[s + 1]; for (j = l1; j < h1; j++) { // Compute sum for this distribution - d = getTransitionReward(s, j); + d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j - l1) : getTransitionReward(s, j - l1); l2 = choiceStarts[j]; h2 = choiceStarts[j + 1]; for (k = l2; k < h2; k++) { @@ -997,7 +1010,7 @@ public class MDPSparse extends ModelSparse implements MDP } @Override - public List mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val) + public List mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val) { int j, k, l1, h1, l2, h2; double d; @@ -1010,7 +1023,7 @@ public class MDPSparse extends ModelSparse implements MDP h1 = rowStarts[s + 1]; for (j = l1; j < h1; j++) { // Compute sum for this distribution - d = getTransitionReward(s, j); + d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j); l2 = choiceStarts[j]; h2 = choiceStarts[j + 1]; for (k = l2; k < h2; k++) { @@ -1031,6 +1044,7 @@ public class MDPSparse extends ModelSparse implements MDP public String toString() { int i, j, k, l1, h1, l2, h2; + Object o; String s = ""; s = "[ "; for (i = 0; i < numStates; i++) { @@ -1040,7 +1054,12 @@ public class MDPSparse extends ModelSparse implements MDP l1 = rowStarts[i]; h1 = rowStarts[i + 1]; for (j = l1; j < h1; j++) { - s += (j == l1) ? "{" : ", {"; + if (j > l1) + s += ","; + o = actions[j]; + if (o != null) + s += o + ":"; + s += "{"; l2 = choiceStarts[j]; h2 = choiceStarts[j + 1]; for (k = l2; k < h2; k++) { @@ -1079,9 +1098,10 @@ public class MDPSparse extends ModelSparse implements MDP // TODO: compare rewards (complicated: null = 0,0,0,0)*/ return true; } - + @Override - public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest) { + public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest) + { for (int s : states) { int j, l2, h2; int k = adv[s]; diff --git a/prism/src/explicit/ProbModelChecker.java b/prism/src/explicit/ProbModelChecker.java index 4909381b..e279298b 100644 --- a/prism/src/explicit/ProbModelChecker.java +++ b/prism/src/explicit/ProbModelChecker.java @@ -142,7 +142,8 @@ public class ProbModelChecker extends StateModelChecker boolean min = false; // For nondeterministic models, are we finding min (true) or max (false) rewards ModelType modelType = model.getModelType(); StateValues rews = null; - MCRewards modelRewards = null; + MCRewards mcRewards = null; + MDPRewards mdpRewards = null; int i; // Get info from reward operator @@ -194,7 +195,10 @@ public class ProbModelChecker extends StateModelChecker switch (modelType) { case CTMC: case DTMC: - modelRewards = buildMCRewardStructure(model, rewStruct); + mcRewards = buildMCRewardStructure(model, rewStruct); + break; + case MDP: + mdpRewards = buildMDPRewardStructure((MDP) model, rewStruct); break; default: throw new PrismException("Cannot build rewards for " + modelType + "s"); @@ -204,13 +208,13 @@ public class ProbModelChecker extends StateModelChecker mainLog.println("Building reward structure..."); switch (modelType) { case CTMC: - rews = ((CTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression()); + rews = ((CTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression()); break; case DTMC: - rews = ((DTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression()); + rews = ((DTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression()); break; case MDP: - rews = ((MDPModelChecker) this).checkRewardFormula(model, expr.getExpression(), min); + rews = ((MDPModelChecker) this).checkRewardFormula(model, mdpRewards, expr.getExpression(), min); break; default: throw new PrismException("Cannot model check " + expr + " for " + modelType + "s"); @@ -237,7 +241,7 @@ public class ProbModelChecker extends StateModelChecker case CTMC: case DTMC: if (rewStr.getNumTransItems() > 0) { - throw new PrismException("Explicit engine does not yet handle transition rewards"); + throw new PrismException("Explicit engine does not yet handle transition rewards for D/CTMCs"); } // Special case: constant rewards if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) { @@ -245,9 +249,9 @@ public class ProbModelChecker extends StateModelChecker } // Normal: state rewards else { - MCRewardsStateArray rewSA = new MCRewardsStateArray(model.getNumStates()); numStates = model.getNumStates(); statesList = model.getStatesList(); + MCRewardsStateArray rewSA = new MCRewardsStateArray(numStates); n = rewStr.getNumItems(); for (i = 0; i < n; i++) { guard = rewStr.getStates(i); @@ -264,4 +268,46 @@ public class ProbModelChecker extends StateModelChecker throw new PrismException("Cannot build rewards for " + model.getModelType() + "s"); } } + + private MDPRewards buildMDPRewardStructure(MDP mdp, RewardStruct rewStr) throws PrismException + { + //MCRewards modelRewards = null; + List statesList; + Expression guard; + String action; + Object mdpAction; + int i, j, k, n, numStates, numChoices; + + if (rewStr.getNumStateItems() > 0) { + throw new PrismException("Explicit engine does not yet handle state rewards for MDPs"); + } + // Special case: constant rewards + // TODO + /*if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) { + return new MCRewardsStateConstant(rewStr.getReward(0).evaluateDouble()); + }*/ + // Normal: transition rewards + else { + numStates = mdp.getNumStates(); + statesList = mdp.getStatesList(); + MDPRewardsSimple rewSimple = new MDPRewardsSimple(numStates); + n = rewStr.getNumItems(); + for (i = 0; i < n; i++) { + guard = rewStr.getStates(i); + action = rewStr.getSynch(i); + for (j = 0; j < numStates; j++) { + if (guard.evaluateBoolean(statesList.get(j))) { + numChoices = mdp.getNumChoices(j); + for (k = 0; k < numChoices; k++) { + mdpAction = mdp.getAction(j, k); + if (mdpAction == null ? (action == null) : mdpAction.equals(action)) { + rewSimple.setTransitionReward(j, k, rewStr.getReward(i).evaluateDouble(statesList.get(j))); + } + } + } + } + } + return rewSimple; + } + } } diff --git a/prism/src/explicit/QuantAbstractRefine.java b/prism/src/explicit/QuantAbstractRefine.java index 7805d64c..27ec4aa7 100644 --- a/prism/src/explicit/QuantAbstractRefine.java +++ b/prism/src/explicit/QuantAbstractRefine.java @@ -801,9 +801,9 @@ public abstract class QuantAbstractRefine switch (abstractionType) { case MDP: if (optimise && refinementNum > 0) { - res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, lbSoln, known); + res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, lbSoln, known); } else { - res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, null, null); + res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, null, null); } break; default: @@ -821,9 +821,9 @@ public abstract class QuantAbstractRefine case MDP: if (optimise) { double lbCopy[] = Utils.cloneDoubleArray(lbSoln); - res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, lbCopy, known); + res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, lbCopy, known); } else { - res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, null, null); + res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, null, null); } break; default: @@ -1021,9 +1021,9 @@ public abstract class QuantAbstractRefine ubLastSoln); break; case EXP_REACH: - lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, true, + lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, true, lbLastSoln); - ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, false, + ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, false, ubLastSoln); break; } diff --git a/prism/src/explicit/rewards/MDPRewards.java b/prism/src/explicit/rewards/MDPRewards.java new file mode 100644 index 00000000..9b88973b --- /dev/null +++ b/prism/src/explicit/rewards/MDPRewards.java @@ -0,0 +1,43 @@ +//============================================================================== +// +// Copyright (c) 2002- +// Authors: +// * Dave Parker (University of Oxford) +// +//------------------------------------------------------------------------------ +// +// This file is part of PRISM. +// +// PRISM is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// PRISM is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with PRISM; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//============================================================================== + +package explicit.rewards; + +/** + * Classes that provide (read) access to explicit-state rewards for an MDP. + */ +public abstract class MDPRewards +{ + /** + * Get the state reward for state {@code s}. + */ + public abstract double getStateReward(int s); + + /** + * Get the transition reward for the {@code i}th choice from state {@code s}. + */ + public abstract double getTransitionReward(int s, int i); +} diff --git a/prism/src/explicit/rewards/MDPRewardsSimple.java b/prism/src/explicit/rewards/MDPRewardsSimple.java new file mode 100644 index 00000000..d114b542 --- /dev/null +++ b/prism/src/explicit/rewards/MDPRewardsSimple.java @@ -0,0 +1,107 @@ +//============================================================================== +// +// Copyright (c) 2002- +// Authors: +// * Dave Parker (University of Oxford) +// +//------------------------------------------------------------------------------ +// +// This file is part of PRISM. +// +// PRISM is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// PRISM is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with PRISM; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//============================================================================== + +package explicit.rewards; + +import java.util.ArrayList; +import java.util.List; + +/** + * Simple explicit-state storage of rewards for an MDP. + * Like the related class MDPSimple, this is not especially efficient, but mutable (in terms of size). + */ +public class MDPRewardsSimple extends MDPRewards +{ + /** Number of state */ + protected int numStates; + /** State rewards **/ + protected List stateRewards; + /** Transition rewards **/ + protected List> transRewards; + + /** + * Constructor: all zero rewards. + * @param numStates Number of states + */ + public MDPRewardsSimple(int numStates) + { + this.numStates = numStates; + // Initially lists are just null (denoting all 0) + stateRewards = null; + transRewards = null; + } + + // Mutators + + /** + * Set the reward for choice {@code i} of state {@code s} to {@code r}. + */ + public void setTransitionReward(int s, int i, double r) + { + List list; + // If no rewards array created yet, create it + if (transRewards == null) { + transRewards = new ArrayList>(numStates); + for (int j = 0; j < numStates; j++) + transRewards.add(null); + } + // If no rewards for state i yet, create list + if (transRewards.get(s) == null) { + list = new ArrayList(); + transRewards.set(s, list); + } else { + list = transRewards.get(s); + } + // If list not big enough, extend + int n = i - list.size() + 1; + if (n > 0) { + for (int j = 0; j < n; j++) { + list.add(0.0); + } + } + // Set reward + list.set(i, r); + } + + // Accessors (for MDPRewards) + + @Override + public double getStateReward(int s) + { + return stateRewards.get(s); + } + + @Override + public double getTransitionReward(int s, int i) + { + List list; + if (transRewards == null || (list = transRewards.get(s)) == null) + return 0.0; + if (list.size() <= i) + return 0.0; + return list.get(i); + } +}