From 85147f1a71543fa3d23041fc256debf4888c67f5 Mon Sep 17 00:00:00 2001
From: Dave Parker <dave.x.parker@gmail.com>
Date: Tue, 5 Jul 2011 22:37:02 +0000
Subject: [PATCH] Explicit engine improvements, mainly MDP rewards: * Explicit
 engine gets MDP rewards (transition rewards only) from the model * Rewards
 detached from MDPs (but attached ones still available, e.g. for A-R) *
 Various bug fixes in MDPSparse, especially wrt rewards * Few code tidies

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@3215 bbc10eb1-c90d-0410-af57-cb519fbb1720
---
 prism/src/explicit/MDP.java                   |  11 +-
 prism/src/explicit/MDPModelChecker.java       | 140 +++---------------
 prism/src/explicit/MDPSimple.java             |  18 ++-
 prism/src/explicit/MDPSparse.java             | 120 ++++++++-------
 prism/src/explicit/ProbModelChecker.java      |  60 +++++++-
 prism/src/explicit/QuantAbstractRefine.java   |  12 +-
 prism/src/explicit/rewards/MDPRewards.java    |  43 ++++++
 .../explicit/rewards/MDPRewardsSimple.java    | 107 +++++++++++++
 8 files changed, 316 insertions(+), 195 deletions(-)
 create mode 100644 prism/src/explicit/rewards/MDPRewards.java
 create mode 100644 prism/src/explicit/rewards/MDPRewardsSimple.java
diff --git a/prism/src/explicit/MDP.java b/prism/src/explicit/MDP.java
index 72a3ee03..0cee2140 100644
--- a/prism/src/explicit/MDP.java
+++ b/prism/src/explicit/MDP.java
@@ -29,6 +29,8 @@ package explicit;
 import java.util.*;
 import java.util.Map.Entry;
 
+import explicit.rewards.*;
+
 /**
  * Interface for classes that provide (read) access to an explicit-state MDP.
  */
@@ -157,32 +159,35 @@ public interface MDP extends Model
 	 * Do a matrix-vector multiplication and sum of action reward followed by min/max, i.e. one step of value iteration.
 	 * i.e. for all s: result[s] = min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
 	 * @param vect Vector to multiply by
+	 * @param mdpRewards The rewards
 	 * @param min Min or max for (true=min, false=max)
 	 * @param result Vector to store result in
 	 * @param subset Only do multiplication for these rows (ignored if null)
 	 * @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
 	 * @param adv Storage for adversary choice indices (ignored if null)
 	 */
-	public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]);
+	public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]);
 
 	/**
 	 * Do a single row of matrix-vector multiplication and sum of action reward followed by min/max.
 	 * i.e. return min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
 	 * @param s Row index
 	 * @param vect Vector to multiply by
+	 * @param mdpRewards The rewards
 	 * @param min Min or max for (true=min, false=max)
 	 * @param adv Storage for adversary choice indices (ignored if null)
 	 */
-	public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[]);
+	public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]);
 
 	/**
 	 * Determine which choices result in min/max after a single row of matrix-vector multiplication and sum of action reward.
 	 * @param s Row index
 	 * @param vect Vector to multiply by
+	 * @param mdpRewards The rewards
 	 * @param min Min or max (true=min, false=max)
 	 * @param val Min or max value to match
 	 */
-	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val);
+	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val);
 
 	/**
 	 * Multiply the probability matrix induced by the mdp and {@code adv}
diff --git a/prism/src/explicit/MDPModelChecker.java b/prism/src/explicit/MDPModelChecker.java
index 64acf8be..d4d78687 100644
--- a/prism/src/explicit/MDPModelChecker.java
+++ b/prism/src/explicit/MDPModelChecker.java
@@ -32,6 +32,8 @@ import parser.ast.Expression;
 import parser.ast.ExpressionTemporal;
 import java.util.Map.Entry;
 
+import explicit.rewards.MDPRewards;
+
 import prism.*;
 
 /**
@@ -156,7 +158,7 @@ public class MDPModelChecker extends ProbModelChecker
 	/**
 	 * Compute rewards for the contents of an R operator.
 	 */
-	protected StateValues checkRewardFormula(Model model, Expression expr, boolean min) throws PrismException
+	protected StateValues checkRewardFormula(Model model, MDPRewards modelRewards, Expression expr, boolean min) throws PrismException
 	{
 		StateValues rewards = null;
 		
@@ -164,7 +166,7 @@ public class MDPModelChecker extends ProbModelChecker
 			ExpressionTemporal exprTemp = (ExpressionTemporal) expr;
 			switch (exprTemp.getOperator()) {
 			case ExpressionTemporal.R_F:
-				rewards = checkRewardReach(model, exprTemp, min);
+				rewards = checkRewardReach(model, modelRewards, exprTemp, min);
 				break;
 			default:
 				throw new PrismException("Explicit engine does not yet handle the " + exprTemp.getOperatorSymbol() + " operator in the R operator");
@@ -180,7 +182,7 @@ public class MDPModelChecker extends ProbModelChecker
 	/**
 	 * Compute rewards for a reachability reward operator.
 	 */
-	protected StateValues checkRewardReach(Model model, ExpressionTemporal expr, boolean min) throws PrismException
+	protected StateValues checkRewardReach(Model model, MDPRewards modelRewards, ExpressionTemporal expr, boolean min) throws PrismException
 	{
 		BitSet b;
 		StateValues rewards = null;
@@ -193,8 +195,7 @@ public class MDPModelChecker extends ProbModelChecker
 		// mainLog.print("\nb = " + JDD.GetNumMintermsString(b1,
 		// allDDRowVars.n()));
 
-		//res = computeFracRewardsPolIter((MDP) model, null, null, min);
-		res = computeReachRewards((MDP) model, b, min);
+		res = computeReachRewards((MDP) model, modelRewards, b, min);
 		rewards = StateValues.createFromDoubleArray(res.soln);
 
 		return rewards;
@@ -960,25 +961,27 @@ public class MDPModelChecker extends ProbModelChecker
 	/**
 	 * Compute expected reachability rewards.
 	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
 	 * @param target Target states
 	 * @param min Min or max rewards (true=min, false=max)
 	 */
-	public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min) throws PrismException
+	public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
 	{
-		return computeReachRewards(mdp, target, min, null, null);
+		return computeReachRewards(mdp, mdpRewards, target, min, null, null);
 	}
 
 	/**
 	 * Compute expected reachability rewards.
 	 * i.e. compute the min/max reward accumulated to reach a state in {@code target}.
 	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
 	 * @param target Target states
 	 * @param min Min or max rewards (true=min, false=max)
 	 * @param init Optionally, an initial solution vector (may be overwritten) 
 	 * @param known Optionally, a set of states for which the exact answer is known
 	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.  
 	 */
-	public ModelCheckerResult computeReachRewards(MDP mdp, BitSet target, boolean min, double init[], BitSet known) throws PrismException
+	public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known) throws PrismException
 	{
 		ModelCheckerResult res = null;
 		BitSet inf;
@@ -1018,7 +1021,7 @@ public class MDPModelChecker extends ProbModelChecker
 		// Compute rewards
 		switch (solnMethod) {
 		case VALUE_ITERATION:
-			res = computeReachRewardsValIter(mdp, target, inf, min, init, known);
+			res = computeReachRewardsValIter(mdp, mdpRewards, target, inf, min, init, known);
 			break;
 		default:
 			throw new PrismException("Unknown MDP solution method " + solnMethod);
@@ -1038,6 +1041,7 @@ public class MDPModelChecker extends ProbModelChecker
 	/**
 	 * Compute expected reachability rewards using value iteration.
 	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
 	 * @param target Target states
 	 * @param inf States for which reward is infinite
 	 * @param min Min or max rewards (true=min, false=max)
@@ -1045,7 +1049,7 @@ public class MDPModelChecker extends ProbModelChecker
 	 * @param known Optionally, a set of states for which the exact answer is known
 	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
 	 */
-	protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException
+	protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known) throws PrismException
 	{
 		ModelCheckerResult res;
 		BitSet unknown;
@@ -1095,7 +1099,7 @@ public class MDPModelChecker extends ProbModelChecker
 			//mainLog.println(soln);
 			iters++;
 			// Matrix-vector multiply and min/max ops
-			mdp.mvMultRewMinMax(soln, min, soln2, unknown, false, null);
+			mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, unknown, false, null);
 			// Check termination
 			done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE);
 			// Swap vectors for next iter
@@ -1122,124 +1126,18 @@ public class MDPModelChecker extends ProbModelChecker
 	 * (More precisely, list of indices of choices resulting in min/max.)
 	 * (Note: indices are guaranteed to be sorted in ascending order.)
 	 * @param mdp The MDP
+	 * @param mdpRewards The rewards
 	 * @param state The state to generate strategy info for
 	 * @param target The set of target states to reach
 	 * @param min Min or max rewards (true=min, false=max)
 	 * @param lastSoln Vector of values from which to recompute in one iteration 
 	 */
-	public List<Integer> expReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
+	public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
 	{
-		double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, min, null);
-		return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, min, val);
+		double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null);
+		return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
 	}
 
-	/**
-	 * Compute fractional rewards using policy iteration.
-	 * @param mdp: The MDP
-	 * @param min: Min or max probabilities (true=min, false=max)
-	 */
-	/*protected ModelCheckerResult computeFracRewardsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min) throws PrismException
-	{
-		int n;
-		long timer;
-		int adv[];
-		final String acts = settings.getString(PrismSettings.PRISM_AR_OPTIONS);
-		
-		final Map<String, Double> costMap = new HashMap<String, Double>();
-		final Map<String, Double> rewardMap = new HashMap<String, Double>();
-		for (String tuple : acts.split("\\s*,\\s*")) {
-			String[] nameAndValues = tuple.split("\\s*=\\s*");
-			assert(nameAndValues.length == 2);
-			String[] values = nameAndValues[1].split("/");
-			assert (values.length == 2);
-			final String name = "[" + nameAndValues[0] + "]";
-			costMap.put(name, Double.parseDouble(values[0]));
-			rewardMap.put(name, Double.parseDouble(values[1]));
-		}
-		
-		final double costMatrix[][] = new double[mdp.getNumStates()][];
-		final double rewardMatrix[][] = new double[mdp.getNumStates()][];
-		for (int s = 0; s < mdp.getNumStates(); s++) {
-			costMatrix[s] = new double[mdp.getNumChoices(s)];
-			rewardMatrix[s] = new double[mdp.getNumChoices(s)];
-			
-			for (int a = 0; a < mdp.getNumChoices(s); a++) {
-				String action = (String) mdp.getAction(s, a);
-				if (action != null && costMap.containsKey(action)) {
-				    costMatrix[s][a] = costMap.get(action);
-				    rewardMatrix[s][a] = rewardMap.get(action);
-				}
-			}
-		}
-		
-		mainLog.println("Started end-component calculation");
-		timer = System.currentTimeMillis();
-		EndcomponentsOf eco = new EndcomponentsOf(mdp);
-		List<List<Integer>> ecs = eco.getEndComponents();
-		mainLog.println("Done with end-component calculation: " + (System.currentTimeMillis() - timer) / 1000.0);
-		mainLog.println(ecs.size() + " end-components.");
-		
-		// Store num states
-		n = mdp.getNumStates();
-		
-		// Generate initial adversary
-		adv = new int[n];
-		
-		// Start policy iteration
-		timer = System.currentTimeMillis();
-		mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");
-		
-		// Optimize every end-component separately
-		for (List<Integer> ec : ecs) {
-			// Build the subMDP induced by the end-component
-			MDP subMDP = new MDPSparse(mdp, ec, eco.getAvailableActions());
-			
-			// Build the cost- and reward matrices induced by the end-components
-			double[][] subCostMatrix = new double[ec.size()][];
-			double[][] subRewardMatrix = new double[ec.size()][];
-			for (int i = 0; i < ec.size(); i++) {
-				List<Integer> actions = eco.getAvailableActions().get(ec.get(i));
-				subCostMatrix[i] = new double[actions.size()];
-				subRewardMatrix[i] = new double[actions.size()];
-				for (int a = 0; a < actions.size(); a++) {
-					subCostMatrix[i][a] = costMatrix[ec.get(i)][actions.get(a)];
-					subRewardMatrix[i][a] = rewardMatrix[ec.get(i)][actions.get(a)];
-				}
-			}
-			
-			// Actually optimize the components
-			RatioECOptimizer c = new RatioECOptimizer(subMDP, subCostMatrix, subRewardMatrix);
-			if (! c.isZero()) {
-				c.computeOptimalStrategy();
-			} else {
-				// We don't need to do anything because isZero already calculated
-				// a strategy for us
-			}
-			// reverse translate the strategy
-			int[] subAdv = c.getAdv();
-			for (int i = 0; i < ec.size(); i++) {
-				adv[ec.get(i)] = eco.getAvailableActions().get(ec.get(i)).get(subAdv[i]);
-			}
-		}
-		
-		for (int s = 0; s < mdp.getNumStates(); s++) {
-			mainLog.println(mdp.getStatesList().get(s) + ": " + mdp.getAction(s, adv[s]));
-			mainLog.println(" (" + costMatrix[s][adv[s]] + ", " + rewardMatrix[s][adv[s]] + ")");
-			Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, adv[s]);
-			while (it.hasNext()) {
-				Entry<Integer, Double> next = it.next();
-				mainLog.println("\t -> " + next.getKey() + " (" + next.getValue() + ")");
-			}
-		}
-		
-		// TODO: Compose strategy
-
-		System.out.println("DONE!!!");
-		System.exit(0);
-		
-		return null;
-	}*/
-
 	/**
 	 * Simple test program.
 	 */
diff --git a/prism/src/explicit/MDPSimple.java b/prism/src/explicit/MDPSimple.java
index bd5519a8..20b20421 100644
--- a/prism/src/explicit/MDPSimple.java
+++ b/prism/src/explicit/MDPSimple.java
@@ -30,6 +30,8 @@ import java.util.*;
 import java.util.Map.Entry;
 import java.io.*;
 
+import explicit.rewards.MDPRewards;
+
 import prism.ModelType;
 import prism.PrismException;
 import prism.PrismUtils;
@@ -853,24 +855,24 @@ public class MDPSimple extends ModelSimple implements MDP
 	}
 
 	@Override
-	public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[])
+	public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[])
 	{
 		int s;
 		// Loop depends on subset/complement arguments
 		if (subset == null) {
 			for (s = 0; s < numStates; s++)
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		} else if (complement) {
 			for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		} else {
 			for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		}
 	}
 
 	@Override
-	public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[])
+	public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
 	{
 		int j, k;
 		double d, prob, minmax;
@@ -886,7 +888,7 @@ public class MDPSimple extends ModelSimple implements MDP
 		for (Distribution distr : step) {
 			j++;
 			// Compute sum for this distribution
-			d = getTransitionReward(s, j);
+			d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
 			for (Map.Entry<Integer, Double> e : distr) {
 				k = (Integer) e.getKey();
 				prob = (Double) e.getValue();
@@ -902,7 +904,7 @@ public class MDPSimple extends ModelSimple implements MDP
 	}
 
 	@Override
-	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val)
+	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
 	{
 		int j, k;
 		double d, prob;
@@ -917,7 +919,7 @@ public class MDPSimple extends ModelSimple implements MDP
 		for (Distribution distr : step) {
 			j++;
 			// Compute sum for this distribution
-			d = getTransitionReward(s, j);
+			d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
 			for (Map.Entry<Integer, Double> e : distr) {
 				k = (Integer) e.getKey();
 				prob = (Double) e.getValue();
diff --git a/prism/src/explicit/MDPSparse.java b/prism/src/explicit/MDPSparse.java
index 9646051f..7a4b065d 100644
--- a/prism/src/explicit/MDPSparse.java
+++ b/prism/src/explicit/MDPSparse.java
@@ -30,6 +30,8 @@ import java.util.*;
 import java.util.Map.Entry;
 import java.io.*;
 
+import explicit.rewards.MDPRewards;
+
 import prism.ModelType;
 import prism.PrismException;
 import prism.PrismUtils;
@@ -54,9 +56,7 @@ public class MDPSparse extends ModelSparse implements MDP
 	protected int rowStarts[];
 
 	// Action labels
-	/**
-	 * Array containing actions; is of size numDistrs
-	 */
+	/** Array of action labels for choices (is of size numDistrs) */
 	protected Object actions[];
 
 	// Rewards
@@ -79,7 +79,7 @@ public class MDPSparse extends ModelSparse implements MDP
 	{
 		this(mdp, false);
 	}
-	
+
 	/**
 	 * Copy constructor for a (sub-)MDP from a given MDP.
 	 * The states and actions will be indexed as given by the order
@@ -89,25 +89,26 @@ public class MDPSparse extends ModelSparse implements MDP
 	 * @param states States to copy
 	 * @param actions Actions to copy
 	 */
-	public MDPSparse(MDP mdp, List<Integer> states, List<List<Integer>> actions) {
+	public MDPSparse(MDP mdp, List<Integer> states, List<List<Integer>> actions)
+	{
 		initialise(states.size());
 		for (int in : mdp.getInitialStates()) {
 			addInitialState(in);
 		}
-		
+
 		// statesList = new ArrayList<State>(states.size());
 		statesList = new ArrayList<parser.State>();
 		for (int s : states) {
 			statesList.add(mdp.getStatesList().get(s));
 		}
-		
+
 		numDistrs = 0;
 		numTransitions = 0;
 		maxNumDistrs = 0;
-		
+
 		for (int i = 0; i < states.size(); i++) {
 			int s = states.get(i);
-		
+
 			final int numChoices = actions.get(s).size();
 			numDistrs += numChoices;
 			if (numChoices > maxNumDistrs) {
@@ -117,20 +118,20 @@ public class MDPSparse extends ModelSparse implements MDP
 				numTransitions += mdp.getNumTransitions(s, a);
 			}
 		}
-		
+
 		nonZeros = new double[numTransitions];
 		cols = new int[numTransitions];
-		choiceStarts = new int[numDistrs+1];
-		rowStarts = new int[numStates+1];
-		this.actions = new Object[numDistrs + 1];
+		choiceStarts = new int[numDistrs + 1];
+		rowStarts = new int[numStates + 1];
+		this.actions = new Object[numDistrs];
 		int choiceIndex = 0;
 		int colIndex = 0;
-		
+
 		int[] reverseStates = new int[mdp.getNumStates()];
 		for (int i = 0; i < states.size(); i++) {
 			reverseStates[states.get(i)] = i;
 		}
-		
+
 		for (int i = 0; i < states.size(); i++) {
 			int s = states.get(i);
 			rowStarts[i] = choiceIndex;
@@ -144,13 +145,13 @@ public class MDPSparse extends ModelSparse implements MDP
 					cols[colIndex] = reverseStates[next.getKey()];
 					nonZeros[colIndex] = next.getValue();
 					colIndex++;
-				}		
-			}			
+				}
+			}
 		}
-		
+
 		choiceStarts[numDistrs] = numTransitions;
 		rowStarts[numStates] = numDistrs;
-		
+
 		// TODO copy rewards
 	}
 
@@ -181,12 +182,12 @@ public class MDPSparse extends ModelSparse implements MDP
 		cols = new int[numTransitions];
 		choiceStarts = new int[numDistrs + 1];
 		rowStarts = new int[numStates + 1];
-		actions = new Object[numDistrs + 1];
+		actions = new Object[numDistrs];
 		j = k = 0;
 		for (i = 0; i < numStates; i++) {
 			rowStarts[i] = j;
 			for (int l = 0; l < mdp.actions.get(i).size(); l++) {
-				actions[j + l] = mdp.actions.get(i).get(l); 
+				actions[j + l] = mdp.actions.get(i).get(l);
 			}
 			for (Distribution distr : mdp.trans.get(i)) {
 				choiceStarts[j] = k;
@@ -212,8 +213,8 @@ public class MDPSparse extends ModelSparse implements MDP
 		}
 		choiceStarts[numDistrs] = numTransitions;
 		rowStarts[numStates] = numDistrs;
-		
-		// TODO: copy actions, rewards
+
+		// TODO: copy rewards
 	}
 
 	/**
@@ -254,9 +255,13 @@ public class MDPSparse extends ModelSparse implements MDP
 		cols = new int[numTransitions];
 		choiceStarts = new int[numDistrs + 1];
 		rowStarts = new int[numStates + 1];
+		actions = new Object[numDistrs];
 		j = k = 0;
 		for (i = 0; i < numStates; i++) {
 			rowStarts[i] = j;
+			for (int l = 0; l < mdp.actions.get(permutInv[i]).size(); l++) {
+				actions[j + l] = mdp.actions.get(permutInv[i]).get(l);
+			}
 			for (Distribution distr : mdp.trans.get(permutInv[i])) {
 				choiceStarts[j] = k;
 				for (Map.Entry<Integer, Double> e : distr) {
@@ -281,7 +286,8 @@ public class MDPSparse extends ModelSparse implements MDP
 		}
 		choiceStarts[numDistrs] = numTransitions;
 		rowStarts[numStates] = numDistrs;
-		// TODO: copy actions, rewards
+
+		// TODO: copy rewards
 	}
 
 	// Mutators (for ModelSparse)
@@ -607,44 +613,52 @@ public class MDPSparse extends ModelSparse implements MDP
 	@Override
 	public Iterator<Entry<Integer, Double>> getTransitionsIterator(final int s, final int i)
 	{
-		return new Iterator<Entry<Integer, Double>> () {
+		return new Iterator<Entry<Integer, Double>>()
+		{
 			final int start = choiceStarts[rowStarts[s] + i];
 			int col = start;
 			final int end = choiceStarts[rowStarts[s] + i + 1];
-			
+
 			@Override
-			public boolean hasNext() {
+			public boolean hasNext()
+			{
 				return col < end;
 			}
 
 			@Override
-			public Entry<Integer, Double> next() {
+			public Entry<Integer, Double> next()
+			{
 				assert (col < end);
 				final int i = col;
 				col++;
-				return new Entry<Integer, Double>() {
+				return new Entry<Integer, Double>()
+				{
 					int key = cols[i];
 					double value = nonZeros[i];
-					
+
 					@Override
-					public Integer getKey() {
+					public Integer getKey()
+					{
 						return key;
 					}
 
 					@Override
-					public Double getValue() {
+					public Double getValue()
+					{
 						return value;
 					}
 
 					@Override
-					public Double setValue(Double arg0) {
+					public Double setValue(Double arg0)
+					{
 						throw new UnsupportedOperationException();
-					}	
+					}
 				};
 			}
 
 			@Override
-			public void remove() {
+			public void remove()
+			{
 				throw new UnsupportedOperationException();
 			}
 		};
@@ -653,8 +667,7 @@ public class MDPSparse extends ModelSparse implements MDP
 	@Override
 	public Object getAction(int s, int i)
 	{
-		// TODO
-		return null;
+		return actions[rowStarts[s] + i];
 	}
 
 	@Override
@@ -950,38 +963,38 @@ public class MDPSparse extends ModelSparse implements MDP
 	}
 
 	@Override
-	public void mvMultRewMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[])
+	public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[])
 	{
 		int s;
 		// Loop depends on subset/complement arguments
 		if (subset == null) {
 			for (s = 0; s < numStates; s++)
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		} else if (complement) {
 			for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		} else {
 			for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
-				result[s] = mvMultRewMinMaxSingle(s, vect, min, adv);
+				result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
 		}
 	}
 
 	@Override
-	public double mvMultRewMinMaxSingle(int s, double vect[], boolean min, int adv[])
+	public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
 	{
 		int j, k, l1, h1, l2, h2;
 		double d, minmax;
 		boolean first;
-		
+
 		// TODO: implement adv. gen.
-		
+
 		minmax = 0;
 		first = true;
 		l1 = rowStarts[s];
 		h1 = rowStarts[s + 1];
 		for (j = l1; j < h1; j++) {
 			// Compute sum for this distribution
-			d = getTransitionReward(s, j);
+			d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j - l1) : getTransitionReward(s, j - l1);
 			l2 = choiceStarts[j];
 			h2 = choiceStarts[j + 1];
 			for (k = l2; k < h2; k++) {
@@ -997,7 +1010,7 @@ public class MDPSparse extends ModelSparse implements MDP
 	}
 
 	@Override
-	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], boolean min, double val)
+	public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val)
 	{
 		int j, k, l1, h1, l2, h2;
 		double d;
@@ -1010,7 +1023,7 @@ public class MDPSparse extends ModelSparse implements MDP
 		h1 = rowStarts[s + 1];
 		for (j = l1; j < h1; j++) {
 			// Compute sum for this distribution
-			d = getTransitionReward(s, j);
+			d = mdpRewards != null ? mdpRewards.getTransitionReward(s, j) : getTransitionReward(s, j);
 			l2 = choiceStarts[j];
 			h2 = choiceStarts[j + 1];
 			for (k = l2; k < h2; k++) {
@@ -1031,6 +1044,7 @@ public class MDPSparse extends ModelSparse implements MDP
 	public String toString()
 	{
 		int i, j, k, l1, h1, l2, h2;
+		Object o;
 		String s = "";
 		s = "[ ";
 		for (i = 0; i < numStates; i++) {
@@ -1040,7 +1054,12 @@ public class MDPSparse extends ModelSparse implements MDP
 			l1 = rowStarts[i];
 			h1 = rowStarts[i + 1];
 			for (j = l1; j < h1; j++) {
-				s += (j == l1) ? "{" : ", {";
+				if (j > l1)
+					s += ",";
+				o = actions[j];
+				if (o != null)
+					s += o + ":";
+				s += "{";
 				l2 = choiceStarts[j];
 				h2 = choiceStarts[j + 1];
 				for (k = l2; k < h2; k++) {
@@ -1079,9 +1098,10 @@ public class MDPSparse extends ModelSparse implements MDP
 		// TODO: compare rewards (complicated: null = 0,0,0,0)*/
 		return true;
 	}
-	
+
 	@Override
-	public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest) {
+	public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest)
+	{
 		for (int s : states) {
 			int j, l2, h2;
 			int k = adv[s];
diff --git a/prism/src/explicit/ProbModelChecker.java b/prism/src/explicit/ProbModelChecker.java
index 4909381b..e279298b 100644
--- a/prism/src/explicit/ProbModelChecker.java
+++ b/prism/src/explicit/ProbModelChecker.java
@@ -142,7 +142,8 @@ public class ProbModelChecker extends StateModelChecker
 		boolean min = false; // For nondeterministic models, are we finding min (true) or max (false) rewards
 		ModelType modelType = model.getModelType();
 		StateValues rews = null;
-		MCRewards modelRewards = null;
+		MCRewards mcRewards = null;
+		MDPRewards mdpRewards = null;
 		int i;
 
 		// Get info from reward operator
@@ -194,7 +195,10 @@ public class ProbModelChecker extends StateModelChecker
 		switch (modelType) {
 		case CTMC:
 		case DTMC:
-			modelRewards = buildMCRewardStructure(model, rewStruct);
+			mcRewards = buildMCRewardStructure(model, rewStruct);
+			break;
+		case MDP:
+			mdpRewards = buildMDPRewardStructure((MDP) model, rewStruct);
 			break;
 		default:
 			throw new PrismException("Cannot build rewards for " + modelType + "s");
@@ -204,13 +208,13 @@ public class ProbModelChecker extends StateModelChecker
 		mainLog.println("Building reward structure...");
 		switch (modelType) {
 		case CTMC:
-			rews = ((CTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression());
+			rews = ((CTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression());
 			break;
 		case DTMC:
-			rews = ((DTMCModelChecker) this).checkRewardFormula(model, modelRewards, expr.getExpression());
+			rews = ((DTMCModelChecker) this).checkRewardFormula(model, mcRewards, expr.getExpression());
 			break;
 		case MDP:
-			rews = ((MDPModelChecker) this).checkRewardFormula(model, expr.getExpression(), min);
+			rews = ((MDPModelChecker) this).checkRewardFormula(model, mdpRewards, expr.getExpression(), min);
 			break;
 		default:
 			throw new PrismException("Cannot model check " + expr + " for " + modelType + "s");
@@ -237,7 +241,7 @@ public class ProbModelChecker extends StateModelChecker
 		case CTMC:
 		case DTMC:
 			if (rewStr.getNumTransItems() > 0) {
-				throw new PrismException("Explicit engine does not yet handle transition rewards");
+				throw new PrismException("Explicit engine does not yet handle transition rewards for D/CTMCs");
 			}
 			// Special case: constant rewards
 			if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) {
@@ -245,9 +249,9 @@ public class ProbModelChecker extends StateModelChecker
 			}
 			// Normal: state rewards
 			else {
-				MCRewardsStateArray rewSA = new MCRewardsStateArray(model.getNumStates());
 				numStates = model.getNumStates();
 				statesList = model.getStatesList();
+				MCRewardsStateArray rewSA = new MCRewardsStateArray(numStates);
 				n = rewStr.getNumItems();
 				for (i = 0; i < n; i++) {
 					guard = rewStr.getStates(i);
@@ -264,4 +268,46 @@ public class ProbModelChecker extends StateModelChecker
 			throw new PrismException("Cannot build rewards for " + model.getModelType() + "s");
 		}
 	}
+	
+	private MDPRewards buildMDPRewardStructure(MDP mdp, RewardStruct rewStr) throws PrismException
+	{
+		//MCRewards modelRewards = null;
+		List<State> statesList;
+		Expression guard;
+		String action;
+		Object mdpAction;
+		int i, j, k, n, numStates, numChoices;
+
+		if (rewStr.getNumStateItems() > 0) {
+			throw new PrismException("Explicit engine does not yet handle state rewards for MDPs");
+		}
+		// Special case: constant rewards
+		// TODO
+		/*if (rewStr.getNumStateItems() == 1 && Expression.isTrue(rewStr.getStates(0)) && rewStr.getReward(0).isConstant()) {
+			return new MCRewardsStateConstant(rewStr.getReward(0).evaluateDouble());
+		}*/
+		// Normal: transition rewards
+		else {
+			numStates = mdp.getNumStates();
+			statesList = mdp.getStatesList();
+			MDPRewardsSimple rewSimple = new MDPRewardsSimple(numStates);
+			n = rewStr.getNumItems();
+			for (i = 0; i < n; i++) {
+				guard = rewStr.getStates(i);
+				action = rewStr.getSynch(i);
+				for (j = 0; j < numStates; j++) {
+					if (guard.evaluateBoolean(statesList.get(j))) {
+						numChoices = mdp.getNumChoices(j);
+						for (k = 0; k < numChoices; k++) {
+							mdpAction = mdp.getAction(j, k); 
+							if (mdpAction == null ? (action == null) : mdpAction.equals(action)) {
+								rewSimple.setTransitionReward(j, k, rewStr.getReward(i).evaluateDouble(statesList.get(j)));
+							}
+						}
+					}
+				}
+			}
+			return rewSimple;
+		}
+	}
 }
diff --git a/prism/src/explicit/QuantAbstractRefine.java b/prism/src/explicit/QuantAbstractRefine.java
index 7805d64c..27ec4aa7 100644
--- a/prism/src/explicit/QuantAbstractRefine.java
+++ b/prism/src/explicit/QuantAbstractRefine.java
@@ -801,9 +801,9 @@ public abstract class QuantAbstractRefine
 		switch (abstractionType) {
 		case MDP:
 			if (optimise && refinementNum > 0) {
-				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, lbSoln, known);
+				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, lbSoln, known);
 			} else {
-				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, true, null, null);
+				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, true, null, null);
 			}
 			break;
 		default:
@@ -821,9 +821,9 @@ public abstract class QuantAbstractRefine
 		case MDP:
 			if (optimise) {
 				double lbCopy[] = Utils.cloneDoubleArray(lbSoln);
-				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, lbCopy, known);
+				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, lbCopy, known);
 			} else {
-				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, target, false, null, null);
+				res = ((MDPModelChecker) mc).computeReachRewards((MDP) abstraction, null, target, false, null, null);
 			}
 			break;
 		default:
@@ -1021,9 +1021,9 @@ public abstract class QuantAbstractRefine
 							ubLastSoln);
 					break;
 				case EXP_REACH:
-					lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, true,
+					lbStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, true,
 							lbLastSoln);
-					ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, refineState, target, false,
+					ubStrat = ((MDPModelChecker) mc).expReachStrategy((MDP) abstraction, null, refineState, target, false,
 							ubLastSoln);
 					break;
 				}
diff --git a/prism/src/explicit/rewards/MDPRewards.java b/prism/src/explicit/rewards/MDPRewards.java
new file mode 100644
index 00000000..9b88973b
--- /dev/null
+++ b/prism/src/explicit/rewards/MDPRewards.java
@@ -0,0 +1,43 @@
+//==============================================================================
+//	
+//	Copyright (c) 2002-
+//	Authors:
+//	* Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
+//	
+//------------------------------------------------------------------------------
+//	
+//	This file is part of PRISM.
+//	
+//	PRISM is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//	
+//	PRISM is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//	
+//	You should have received a copy of the GNU General Public License
+//	along with PRISM; if not, write to the Free Software Foundation,
+//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//	
+//==============================================================================
+
+package explicit.rewards;
+
+/**
+ * Classes that provide (read) access to explicit-state rewards for an MDP.
+ */
+public abstract class MDPRewards
+{
+	/**
+	 * Get the state reward for state {@code s}.
+	 */
+	public abstract double getStateReward(int s);
+	
+	/**
+	 * Get the transition reward for the {@code i}th choice from state {@code s}.
+	 */
+	public abstract double getTransitionReward(int s, int i); 
+}
diff --git a/prism/src/explicit/rewards/MDPRewardsSimple.java b/prism/src/explicit/rewards/MDPRewardsSimple.java
new file mode 100644
index 00000000..d114b542
--- /dev/null
+++ b/prism/src/explicit/rewards/MDPRewardsSimple.java
@@ -0,0 +1,107 @@
+//==============================================================================
+//	
+//	Copyright (c) 2002-
+//	Authors:
+//	* Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
+//	
+//------------------------------------------------------------------------------
+//	
+//	This file is part of PRISM.
+//	
+//	PRISM is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//	
+//	PRISM is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//	
+//	You should have received a copy of the GNU General Public License
+//	along with PRISM; if not, write to the Free Software Foundation,
+//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//	
+//==============================================================================
+
+package explicit.rewards;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Simple explicit-state storage of rewards for an MDP.
+ * Like the related class MDPSimple, this is not especially efficient, but mutable (in terms of size).
+ */
+public class MDPRewardsSimple extends MDPRewards
+{
+	/** Number of state */
+	protected int numStates;
+	/** State rewards **/
+	protected List<Double> stateRewards;
+	/** Transition rewards **/
+	protected List<List<Double>> transRewards;
+	
+	/**
+	 * Constructor: all zero rewards.
+	 * @param numStates Number of states
+	 */
+	public MDPRewardsSimple(int numStates)
+	{
+		this.numStates = numStates;
+		// Initially lists are just null (denoting all 0)
+		stateRewards = null;
+		transRewards = null;
+	}
+	
+	// Mutators
+	
+	/**
+	 * Set the reward for choice {@code i} of state {@code s} to {@code r}.
+	 */
+	public void setTransitionReward(int s, int i, double r)
+	{
+		List<Double> list;
+		// If no rewards array created yet, create it
+		if (transRewards == null) {
+			transRewards = new ArrayList<List<Double>>(numStates);
+			for (int j = 0; j < numStates; j++)
+				transRewards.add(null);
+		}
+		// If no rewards for state i yet, create list
+		if (transRewards.get(s) == null) {
+			list = new ArrayList<Double>();
+			transRewards.set(s, list);
+		} else {
+			list = transRewards.get(s);
+		}
+		// If list not big enough, extend
+		int n = i - list.size() + 1;
+		if (n > 0) {
+			for (int j = 0; j < n; j++) {
+				list.add(0.0);
+			}
+		}
+		// Set reward
+		list.set(i, r);
+	}
+
+	// Accessors (for MDPRewards)
+	
+	@Override
+	public double getStateReward(int s)
+	{
+		return stateRewards.get(s);
+	}
+	
+	@Override
+	public double getTransitionReward(int s, int i)
+	{
+		List<Double> list;
+		if (transRewards == null || (list = transRewards.get(s)) == null)
+			return 0.0;
+		if (list.size() <= i)
+			return 0.0;
+		return list.get(i);
+	}
+}