//==============================================================================
//	
//	Copyright (c) 2002-
//	Authors:
//	* Dave Parker <david.parker@comlab.ox.ac.uk> (University of Oxford)
//	
//------------------------------------------------------------------------------
//	
//	This file is part of PRISM.
//	
//	PRISM is free software; you can redistribute it and/or modify
//	it under the terms of the GNU General Public License as published by
//	the Free Software Foundation; either version 2 of the License, or
//	(at your option) any later version.
//	
//	PRISM is distributed in the hope that it will be useful,
//	but WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//	GNU General Public License for more details.
//	
//	You should have received a copy of the GNU General Public License
//	along with PRISM; if not, write to the Free Software Foundation,
//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//	
//==============================================================================

package explicit;

import java.util.Arrays;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.PrimitiveIterator;
import java.util.Vector;

import common.IterableStateSet;
import common.StopWatch;
import parser.VarList;
import parser.ast.Declaration;
import parser.ast.DeclarationIntUnbounded;
import parser.ast.Expression;
import parser.ast.ExpressionTemporal;
import parser.ast.TemporalOperatorBound;
import prism.OptionsIntervalIteration;
import prism.Prism;
import prism.PrismComponent;
import prism.PrismDevNullLog;
import prism.PrismException;
import prism.PrismFileLog;
import prism.PrismLog;
import prism.PrismNotSupportedException;
import prism.PrismSettings;
import prism.PrismUtils;
import strat.MDStrategyArray;
import acceptance.AcceptanceReach;
import acceptance.AcceptanceType;
import common.BitSetAndQueue;
import common.BitSetTools;
import automata.DA;
import common.IntSet;
import common.IterableBitSet;
import explicit.IncomingChoiceRelation.Choice;
import explicit.modelviews.EquivalenceRelationInteger;
import explicit.modelviews.MDPDroppedAllChoices;
import explicit.modelviews.MDPEquiv;
import explicit.rewards.MCRewards;
import explicit.rewards.MCRewardsFromMDPRewards;
import explicit.rewards.MDPRewards;
import explicit.rewards.Rewards;

/**
 * Explicit-state model checker for Markov decision processes (MDPs).
 */
public class MDPModelChecker extends ProbModelChecker
{
	/**
	 * Create a new MDPModelChecker, inherit basic state from parent (unless null).
	 */
	public MDPModelChecker(PrismComponent parent) throws PrismException
	{
		super(parent);

		// PRISM_FAIRNESS
		if (settings != null && settings.getBoolean(PrismSettings.PRISM_FAIRNESS)) {
			throw new PrismNotSupportedException("The explicit engine does not support model checking MDPs under fairness");
		}
	}
	
	// Model checking functions

	@Override
	protected StateValues checkProbPathFormulaSimple(Model model, Expression expr, MinMax minMax, BitSet statesOfInterest) throws PrismException
	{
		// In continuous time case, defer to the standard handling without special treatments for rewards
		if (model.getModelType().continuousTime()) {
			return super.checkProbPathFormulaSimple(model, expr, minMax, statesOfInterest);
		}
		
		expr = Expression.convertSimplePathFormulaToCanonicalForm(expr);
		ExpressionTemporal exprTemp = Expression.getTemporalOperatorForSimplePathFormula(expr);

		if (exprTemp.getBounds().hasRewardBounds() ||
		    exprTemp.getBounds().countTimeBoundsDiscrete() > 1) {
			// We have reward bounds or multiple time / step bounds
			// transform model and expression and recurse
			
			List<TemporalOperatorBound> boundsToReplace = exprTemp.getBounds().getStepBoundsForDiscreteTime();
			if (!boundsToReplace.isEmpty()) {
				// exempt first time bound, is handled by standard simple path formula procedure
				boundsToReplace.remove(0);
			}
			boundsToReplace.addAll(exprTemp.getBounds().getRewardBounds());

			ModelExpressionTransformation<MDP, MDP> transformed = 
			    CounterTransformation.replaceBoundsWithCounters(this, (MDP) model, expr, boundsToReplace, statesOfInterest);
			mainLog.println("\nPerforming actual calculations for\n");
			mainLog.println("MDP:  "+transformed.getTransformedModel().infoString());
			mainLog.println("Formula: "+transformed.getTransformedExpression() +"\n");

			// We can now delegate to ProbModelChecker.checkProbPathFormulaSimple as there is
			// at most one time / step bound remaining
			StateValues svTransformed = super.checkProbPathFormulaSimple(transformed.getTransformedModel(), transformed.getTransformedExpression(), minMax, transformed.getTransformedStatesOfInterest());
			return transformed.projectToOriginalModel(svTransformed);
		} else {
			// We are fine, delegate to ProbModelChecker.checkProbPathFormulaSimple
			return super.checkProbPathFormulaSimple(model, expr, minMax, statesOfInterest);
		}
	}


	@Override
	protected StateValues checkProbPathFormulaLTL(Model model, Expression expr, boolean qual, MinMax minMax, BitSet statesOfInterest) throws PrismException
	{
		LTLModelChecker mcLtl;
		StateValues probsProduct, probs;
		MDPModelChecker mcProduct;
		LTLModelChecker.LTLProduct<MDP> product;

		// For min probabilities, need to negate the formula
		// (add parentheses to allow re-parsing if required)
		if (minMax.isMin()) {
			expr = Expression.Not(Expression.Parenth(expr.deepCopy()));
		}

		// For LTL model checking routines
		mcLtl = new LTLModelChecker(this);

		// Build product of MDP and automaton
		AcceptanceType[] allowedAcceptance = {
				AcceptanceType.REACH,
				AcceptanceType.BUCHI,
				AcceptanceType.RABIN,
				AcceptanceType.GENERALIZED_RABIN,
				AcceptanceType.STREETT
		};
		product = mcLtl.constructProductMDP(this, (MDP)model, expr, statesOfInterest, allowedAcceptance);
		
		// Output product, if required
		if (getExportProductTrans()) {
				mainLog.println("\nExporting product transition matrix to file \"" + getExportProductTransFilename() + "\"...");
				product.getProductModel().exportToPrismExplicitTra(getExportProductTransFilename());
		}
		if (getExportProductStates()) {
			mainLog.println("\nExporting product state space to file \"" + getExportProductStatesFilename() + "\"...");
			PrismFileLog out = new PrismFileLog(getExportProductStatesFilename());
			VarList newVarList = (VarList) modulesFile.createVarList().clone();
			String daVar = "_da";
			while (newVarList.getIndex(daVar) != -1) {
				daVar = "_" + daVar;
			}
			newVarList.addVar(0, new Declaration(daVar, new DeclarationIntUnbounded()), 1, null);
			product.getProductModel().exportStates(Prism.EXPORT_PLAIN, newVarList, out);
			out.close();
		}
		
		// Find accepting states + compute reachability probabilities
		BitSet acc;
		if (product.getAcceptance() instanceof AcceptanceReach) {
			mainLog.println("\nSkipping accepting MEC computation since acceptance is defined via goal states...");
			acc = ((AcceptanceReach)product.getAcceptance()).getGoalStates();
		} else {
			mainLog.println("\nFinding accepting MECs...");
			acc = mcLtl.findAcceptingECStates(product.getProductModel(), product.getAcceptance());
		}
		mainLog.println("\nComputing reachability probabilities...");
		mcProduct = new MDPModelChecker(this);
		mcProduct.inheritSettings(this);
		ModelCheckerResult res = mcProduct.computeReachProbs((MDP) product.getProductModel(), acc, false);
		probsProduct = StateValues.createFromDoubleArray(res.soln, product.getProductModel());

		// Subtract from 1 if we're model checking a negated formula for regular Pmin
		if (minMax.isMin()) {
			probsProduct.timesConstant(-1.0);
			probsProduct.plusConstant(1.0);
		}

		// Output vector over product, if required
		if (getExportProductVector()) {
				mainLog.println("\nExporting product solution vector matrix to file \"" + getExportProductVectorFilename() + "\"...");
				PrismFileLog out = new PrismFileLog(getExportProductVectorFilename());
				probsProduct.print(out, false, false, false, false);
				out.close();
		}
		
		// Mapping probabilities in the original model
		probs = product.projectToOriginalModel(probsProduct);
		probsProduct.clear();

		return probs;
	}

	/**
	 * Compute rewards for a co-safe LTL reward operator.
	 */
	protected StateValues checkRewardCoSafeLTL(Model model, Rewards modelRewards, Expression expr, MinMax minMax, BitSet statesOfInterest) throws PrismException
	{
		LTLModelChecker mcLtl;
		MDPRewards productRewards;
		StateValues rewardsProduct, rewards;
		MDPModelChecker mcProduct;
		LTLModelChecker.LTLProduct<MDP> product;

		// For LTL model checking routines
		mcLtl = new LTLModelChecker(this);

		// Model check maximal state formulas and construct DFA, with the special
		// handling needed for cosafety reward translation
		Vector<BitSet> labelBS = new Vector<BitSet>();
		DA<BitSet, AcceptanceReach> da = mcLtl.constructDFAForCosafetyRewardLTL(this, model, expr, labelBS);

		StopWatch timer = new StopWatch(getLog());
		mainLog.println("\nConstructing " + model.getModelType() + "-" + da.getAutomataType() + " product...");
		timer.start(model.getModelType() + "-" + da.getAutomataType() + " product");
		product = mcLtl.constructProductModel(da, (MDP)model, labelBS, statesOfInterest);
		timer.stop("product has " + product.getProductModel().infoString());

		// Adapt reward info to product model
		productRewards = ((MDPRewards) modelRewards).liftFromModel(product);

		// Output product, if required
		if (getExportProductTrans()) {
				mainLog.println("\nExporting product transition matrix to file \"" + getExportProductTransFilename() + "\"...");
				product.getProductModel().exportToPrismExplicitTra(getExportProductTransFilename());
		}
		if (getExportProductStates()) {
			mainLog.println("\nExporting product state space to file \"" + getExportProductStatesFilename() + "\"...");
			PrismFileLog out = new PrismFileLog(getExportProductStatesFilename());
			VarList newVarList = (VarList) modulesFile.createVarList().clone();
			String daVar = "_da";
			while (newVarList.getIndex(daVar) != -1) {
				daVar = "_" + daVar;
			}
			newVarList.addVar(0, new Declaration(daVar, new DeclarationIntUnbounded()), 1, null);
			product.getProductModel().exportStates(Prism.EXPORT_PLAIN, newVarList, out);
			out.close();
		}

		// Find accepting states + compute reachability rewards
		BitSet acc = ((AcceptanceReach)product.getAcceptance()).getGoalStates();

		mainLog.println("\nComputing reachability rewards...");
		mcProduct = new MDPModelChecker(this);
		mcProduct.inheritSettings(this);
		ModelCheckerResult res = mcProduct.computeReachRewards((MDP)product.getProductModel(), productRewards, acc, minMax.isMin());
		rewardsProduct = StateValues.createFromDoubleArray(res.soln, product.getProductModel());

		// Output vector over product, if required
		if (getExportProductVector()) {
				mainLog.println("\nExporting product solution vector matrix to file \"" + getExportProductVectorFilename() + "\"...");
				PrismFileLog out = new PrismFileLog(getExportProductVectorFilename());
				rewardsProduct.print(out, false, false, false, false);
				out.close();
		}

		// Mapping rewards in the original model
		rewards = product.projectToOriginalModel(rewardsProduct);
		rewardsProduct.clear();

		return rewards;
	}
	
	// Numerical computation functions

	/**
	 * Compute next=state probabilities.
	 * i.e. compute the probability of being in a state in {@code target} in the next step.
	 * @param mdp The MDP
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 */
	public ModelCheckerResult computeNextProbs(MDP mdp, BitSet target, boolean min) throws PrismException
	{
		ModelCheckerResult res = null;
		int n;
		double soln[], soln2[];
		long timer;

		timer = System.currentTimeMillis();

		// Store num states
		n = mdp.getNumStates();

		// Create/initialise solution vector(s)
		soln = Utils.bitsetToDoubleArray(target, n);
		soln2 = new double[n];

		// Next-step probabilities 
		mdp.mvMultMinMax(soln, min, soln2, null, false, null);

		// Return results
		res = new ModelCheckerResult();
		res.soln = soln2;
		res.numIters = 1;
		res.timeTaken = timer / 1000.0;
		return res;
	}

	/**
	 * Given a value vector x, compute the probability:
	 *   v(s) = min/max sched [ Sum_s' P_sched(s,s')*x(s') ]  for s labeled with a,
	 *   v(s) = 0   for s not labeled with a.
	 *
	 * Clears the StateValues object x.
	 *
	 * @param tr the transition matrix
	 * @param a the set of states labeled with a
	 * @param x the value vector
	 * @param min compute min instead of max
	 */
	public double[] computeRestrictedNext(MDP mdp, BitSet a, double[] x, boolean min)
	{
		int n;
		double soln[];

		// Store num states
		n = mdp.getNumStates();

		// initialized to 0.0
		soln = new double[n];

		// Next-step probabilities multiplication
		// restricted to a states
		mdp.mvMultMinMax(x, min, soln, a, false, null);

		return soln;
	}

	/**
	 * Compute reachability probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target}.
	 * @param mdp The MDP
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 */
	public ModelCheckerResult computeReachProbs(MDP mdp, BitSet target, boolean min) throws PrismException
	{
		return computeReachProbs(mdp, null, target, min, null, null);
	}

	/**
	 * Compute until probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target},
	 * while remaining in those in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 */
	public ModelCheckerResult computeUntilProbs(MDP mdp, BitSet remain, BitSet target, boolean min) throws PrismException
	{
		return computeReachProbs(mdp, remain, target, min, null, null);
	}

	/**
	 * Compute reachability/until probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target},
	 * while remaining in those in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (may be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).
	 * Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.  
	 */
	public ModelCheckerResult computeReachProbs(MDP mdp, BitSet remain, BitSet target, boolean min, double init[], BitSet known) throws PrismException
	{
		ModelCheckerResult res = null;
		BitSet no, yes;
		int n, numYes, numNo;
		long timer, timerProb0, timerProb1;
		int strat[] = null;
		PredecessorRelation pre = null;
		// Local copy of setting
		MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;

		boolean doPmaxQuotient = this.doPmaxQuotient;

		// Switch to a supported method, if necessary
		if (mdpSolnMethod == MDPSolnMethod.LINEAR_PROGRAMMING) {
			mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
			mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
		}

		// Check for some unsupported combinations
		if (mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION && valIterDir == ValIterDir.ABOVE) {
			if (!(precomp && prob0))
				throw new PrismException("Precomputation (Prob0) must be enabled for value iteration from above");
			if (!min)
				throw new PrismException("Value iteration from above only works for minimum probabilities");
		}
		if (doIntervalIteration) {
			if (!min && (genStrat || exportAdv)) {
				throw new PrismNotSupportedException("Currently, explicit engine does not support adversary construction for interval iteration and Pmax");
			}
			if (mdpSolnMethod != MDPSolnMethod.VALUE_ITERATION && mdpSolnMethod != MDPSolnMethod.GAUSS_SEIDEL) {
				throw new PrismNotSupportedException("Currently, explicit engine only supports interval iteration with value iteration or Gauss-Seidel for MDPs");
			}
			if (init != null)
				throw new PrismNotSupportedException("Interval iteration currently not supported with provided initial values");
			if (!(precomp && prob0 && prob1)) {
				throw new PrismNotSupportedException("Precomputations (Prob0 & Prob1) must be enabled for interval iteration");
			}

			if (!min) {
				doPmaxQuotient = true;
			}
		}
		if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION || mdpSolnMethod == MDPSolnMethod.MODIFIED_POLICY_ITERATION) {
			if (known != null) {
				throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
			}
		}

		if (doPmaxQuotient && min) {
			// for Pmin, don't do quotient
			doPmaxQuotient = false;
		}

		// Start probabilistic reachability
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting probabilistic reachability (" + (min ? "min" : "max") + ")...");

		// Check for deadlocks in non-target state (because breaks e.g. prob1)
		mdp.checkForDeadlocks(target);

		// Store num states
		n = mdp.getNumStates();

		// Optimise by enlarging target set (if more info is available)
		if (init != null && known != null && !known.isEmpty()) {
			BitSet targetNew = (BitSet) target.clone();
			for (int i : new IterableBitSet(known)) {
				if (init[i] == 1.0) {
					targetNew.set(i);
				}
			}
			target = targetNew;
		}

		// If required, export info about target states 
		if (getExportTarget()) {
			BitSet bsInit = new BitSet(n);
			for (int i = 0; i < n; i++) {
				bsInit.set(i, mdp.isInitialState(i));
			}
			List<BitSet> labels = Arrays.asList(bsInit, target);
			List<String> labelNames = Arrays.asList("init", "target");
			mainLog.println("\nExporting target states info to file \"" + getExportTargetFilename() + "\"...");
			exportLabels(mdp, labels, labelNames, Prism.EXPORT_PLAIN, new PrismFileLog(getExportTargetFilename()));
		}

		// If required, create/initialise strategy storage
		// Set choices to -1, denoting unknown
		// (except for target states, which are -2, denoting arbitrary)
		if (genStrat || exportAdv) {
			strat = new int[n];
			for (int i = 0; i < n; i++) {
				strat[i] = target.get(i) ? -2 : -1;
			}
		}

		if (precomp && (prob0 || prob1) && preRel) {
			pre = mdp.getPredecessorRelation(this, true);
		}

		// Precomputation
		timerProb0 = System.currentTimeMillis();
		if (precomp && prob0) {
			if (pre != null) {
				no = prob0(mdp, remain, target, min, strat, pre);
			} else {
				no = prob0(mdp, remain, target, min, strat);
			}
		} else {
			no = new BitSet();
		}
		timerProb0 = System.currentTimeMillis() - timerProb0;
		timerProb1 = System.currentTimeMillis();
		if (precomp && prob1) {
			if (pre != null) {
				yes = prob1(mdp, remain, target, min, strat, pre);
			} else {
				yes = prob1(mdp, remain, target, min, strat);
			}
		} else {
			yes = (BitSet) target.clone();
		}
		timerProb1 = System.currentTimeMillis() - timerProb1;

		// Print results of precomputation
		numYes = yes.cardinality();
		numNo = no.cardinality();
		mainLog.println("target=" + target.cardinality() + ", yes=" + numYes + ", no=" + numNo + ", maybe=" + (n - (numYes + numNo)));

		// If still required, store strategy for no/yes (0/1) states.
		// This is just for the cases max=0 and min=1, where arbitrary choices suffice (denoted by -2)
		if (genStrat || exportAdv) {
			if (min) {
				for (int i = yes.nextSetBit(0); i >= 0; i = yes.nextSetBit(i + 1)) {
					if (!target.get(i))
						strat[i] = -2;
				}
			} else {
				for (int i = no.nextSetBit(0); i >= 0; i = no.nextSetBit(i + 1)) {
					strat[i] = -2;
				}
			}
		}

		// Compute probabilities (if needed)
		if (numYes + numNo < n) {

			if (!min && doPmaxQuotient) {
				MDPEquiv maxQuotient = maxQuotient(mdp, yes, no);
				// MDPEquiv retains original state space, making the states that are not used
				// trap states.
				// yesInQuotient is the representative for the yes equivalence class
				BitSet yesInQuotient = new BitSet();
				yesInQuotient.set(maxQuotient.mapStateToRestrictedModel(yes.nextSetBit(0)));
				// noInQuotient is the representative for the no equivalence class as well
				// as the non-representative states (the states in any equivalence class
				// that are not the representative for the class). As the latter states
				// are traps, we can just add them to the no set
				BitSet noInQuotient = new BitSet();
				noInQuotient.set(maxQuotient.mapStateToRestrictedModel(no.nextSetBit(0)));
				noInQuotient.or(maxQuotient.getNonRepresentativeStates());
				MDPSparse quotientModel = new MDPSparse(maxQuotient);

				ModelCheckerResult res1 = computeReachProbsNumeric(quotientModel,
				                                                   mdpSolnMethod,
				                                                   noInQuotient,
				                                                   yesInQuotient,
				                                                   min,
				                                                   init,
				                                                   known,
				                                                   strat);

				res = new ModelCheckerResult();
				res.numIters = res1.numIters;
				res.timeTaken = res1.timeTaken;
				res.soln = new double[mdp.getNumStates()];
				for (int i = 0; i < n; i++) {
					if (yes.get(i)) {
						res.soln[i] = 1.0;
					} else if (no.get(i)) {
						res.soln[i] = 0.0;
					} else {
						res.soln[i] = res1.soln[maxQuotient.mapStateToRestrictedModel(i)];
					}
				}
			} else {
				res = computeReachProbsNumeric(mdp, mdpSolnMethod, no, yes, min, init, known, strat);
			}
		} else {
			res = new ModelCheckerResult();
			res.soln = Utils.bitsetToDoubleArray(yes, n);
		}

		// Finished probabilistic reachability
		timer = System.currentTimeMillis() - timer;
		mainLog.println("Probabilistic reachability took " + timer / 1000.0 + " seconds.");

		// Store strategy
		if (genStrat) {
			res.strat = new MDStrategyArray(mdp, strat);
		}
		// Export adversary
		if (exportAdv) {
			// Prune strategy, if needed
			if (getRestrictStratToReach()) {
				restrictStrategyToReachableStates(mdp, strat);
			}
			// Export
			PrismLog out = new PrismFileLog(exportAdvFilename);
			new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
			out.close();
		}

		// Update time taken
		res.timeTaken = timer / 1000.0;
		res.timeProb0 = timerProb0 / 1000.0;
		res.timePre = (timerProb0 + timerProb1) / 1000.0;

		return res;
	}

	protected ModelCheckerResult computeReachProbsNumeric(MDP mdp, MDPSolnMethod method, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[]) throws PrismException
	{
		ModelCheckerResult res = null;

		IterationMethod iterationMethod = null;
		switch (method) {
		case VALUE_ITERATION:
			iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
			break;
		case GAUSS_SEIDEL:
			iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
			break;
		case POLICY_ITERATION:
			if (doIntervalIteration) {
				throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
			}
			res = computeReachProbsPolIter(mdp, no, yes, min, strat);
			break;
		case MODIFIED_POLICY_ITERATION:
			if (doIntervalIteration) {
				throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
			}
			res = computeReachProbsModPolIter(mdp, no, yes, min, strat);
			break;
		default:
			throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
		}

		if (res == null) { // not yet computed, use iterationMethod
			if (!doIntervalIteration) {
				res = doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, getDoTopologicalValueIteration(), strat);
			} else {
				res = doIntervalIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, getDoTopologicalValueIteration(), strat);
			}
		}

		return res;
	}

	/**
	 * Prob0 precomputation algorithm (using fixpoint computation).
	 * i.e. determine the states of an MDP which, with min/max probability 0,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * {@code min}=true gives Prob0E, {@code min}=false gives Prob0A. 
	 * Optionally, for min only, store optimal (memoryless) strategy info for 0 states. 
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 */
	public BitSet prob0(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min, int strat[])
	{
		int n, iters;
		BitSet u, soln, unknown;
		boolean u_done;
		long timer;

		// Start precomputation
		timer = System.currentTimeMillis();
		if (!silentPrecomputations)
			mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")...");

		// Special case: no target states
		if (target.cardinality() == 0) {
			soln = new BitSet(mdp.getNumStates());
			soln.set(0, mdp.getNumStates());

			// for min, generate strategy, any choice (-2) is fine
			if (min && strat != null) {
				Arrays.fill(strat, -2);
			}
			return soln;
		}

		// Initialise vectors
		n = mdp.getNumStates();
		u = new BitSet(n);
		soln = new BitSet(n);

		// Determine set of states actually need to perform computation for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(target);
		if (remain != null)
			unknown.and(remain);

		// Fixed point loop
		iters = 0;
		u_done = false;
		// Least fixed point - should start from 0 but we optimise by
		// starting from 'target', thus bypassing first iteration
		u.or(target);
		soln.or(target);
		while (!u_done) {
			iters++;
			// Single step of Prob0
			mdp.prob0step(unknown, u, min, soln);
			// Check termination
			u_done = soln.equals(u);
			// u = soln
			u.clear();
			u.or(soln);
		}

		// Negate
		u.flip(0, n);

		// Finished precomputation
		timer = System.currentTimeMillis() - timer;
		if (!silentPrecomputations) {
			mainLog.print("Prob0 (" + (min ? "min" : "max") + ")");
			mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
		}

		// If required, generate strategy. This is for min probs,
		// so it can be done *after* the main prob0 algorithm (unlike for prob1).
		// We simply pick, for all "no" states, the first choice for which all transitions stay in "no"
		if (strat != null) {
			for (int i = u.nextSetBit(0); i >= 0; i = u.nextSetBit(i + 1)) {
				int numChoices = mdp.getNumChoices(i);
				for (int k = 0; k < numChoices; k++) {
					if (mdp.allSuccessorsInSet(i, k, u)) {
						strat[i] = k;
						continue;
					}
				}
			}
		}

		return u;
	}

	/**
	 * Prob0 precomputation algorithm (using predecessor relation).
	 * i.e. determine the states of an MDP which, with min/max probability 0,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * {@code min}=true gives Prob0E, {@code min}=false gives Prob0A.
	 * Optionally, for min only, store optimal (memoryless) strategy info for 0 states.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * @param pre the predecessor relation
	 */
	public BitSet prob0(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[], PredecessorRelation pre)
	{
		int n;
		BitSet result, unknown;
		long timer;

		// Start precomputation
		timer = System.currentTimeMillis();
		mainLog.println("Starting Prob0 (" + (min ? "min" : "max") + ")...");

		// Special case: no target states -> probability = 0 everywhere
		if (target.isEmpty()) {
			result = new BitSet(mdp.getNumStates());
			result.set(0, mdp.getNumStates());
			return result;
		}

		// Initialise vectors
		n = mdp.getNumStates();

		// Determine set of states actually need to perform computation for
		unknown = BitSetTools.complement(n, target);
		if (remain != null)
			unknown.and(remain);

		if (min) {
			BitSet T = (BitSet) target.clone();
			BitSet R = (BitSet) target.clone();

			while (!R.isEmpty()) {
				int t = R.nextSetBit(0);
				R.clear(t);

				for (int s : pre.getPre(t)) {
					if (!unknown.get(s) || T.get(s)) continue;

					boolean forAllSomeInT = true;
					for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
						boolean someInT = mdp.someSuccessorsInSet(s, choice, T);
						if (!someInT) {
							forAllSomeInT = false;
							break;
						}
					}

					if (forAllSomeInT) {
						T.set(s);
						R.set(s);
					}
				}
			}

			result = T;
			// result = S \ T
			result.flip(0, n);
		} else {
			// E [ remain U target ]
			result = pre.calculatePreStar(remain, target, target);
			// Pmax=0 <=> ! E [ remain U target ]  -> complement
			result.flip(0, n);
		}


		// Finished precomputation
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Prob0 (" + (min ? "min" : "max") + ")");
		mainLog.println(" took " + timer / 1000.0 + " seconds.");

		// If required, generate strategy. This is for min probs,
		// so it can be done *after* the main prob0 algorithm (unlike for prob1).
		// We simply pick, for all "no" states, the first choice for which all transitions stay in "no"
		if (strat != null) {
			for (int i = result.nextSetBit(0); i >= 0; i = result.nextSetBit(i + 1)) {
				int numChoices = mdp.getNumChoices(i);
				for (int k = 0; k < numChoices; k++) {
					if (mdp.allSuccessorsInSet(i, k, result)) {
						strat[i] = k;
						continue;
					}
				}
			}
		}

		return result;
	}

	/**
	 * Prob1 precomputation algorithm (using fixpoint computation).
	 * i.e. determine the states of an MDP which, with min/max probability 1,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
	 * Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 */
	public BitSet prob1(MDPGeneric<?> mdp, BitSet remain, BitSet target, boolean min, int strat[])
	{
		int n, iters;
		BitSet u, v, soln, unknown;
		boolean u_done, v_done;
		long timer;

		// Start precomputation
		timer = System.currentTimeMillis();
		if (!silentPrecomputations)
			mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")...");

		// Special case: no target states
		if (target.cardinality() == 0) {
			return new BitSet(mdp.getNumStates());
		}

		// Initialise vectors
		n = mdp.getNumStates();
		u = new BitSet(n);
		v = new BitSet(n);
		soln = new BitSet(n);

		// Determine set of states actually need to perform computation for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(target);
		if (remain != null)
			unknown.and(remain);

		// Nested fixed point loop
		iters = 0;
		u_done = false;
		// Greatest fixed point
		u.set(0, n);
		while (!u_done) {
			v_done = false;
			// Least fixed point - should start from 0 but we optimise by
			// starting from 'target', thus bypassing first iteration
			v.clear();
			v.or(target);
			soln.clear();
			soln.or(target);
			while (!v_done) {
				iters++;
				// Single step of Prob1
				if (min)
					mdp.prob1Astep(unknown, u, v, soln);
				else
					mdp.prob1Estep(unknown, u, v, soln, null);
				// Check termination (inner)
				v_done = soln.equals(v);
				// v = soln
				v.clear();
				v.or(soln);
			}
			// Check termination (outer)
			u_done = v.equals(u);
			// u = v
			u.clear();
			u.or(v);
		}

		// If we need to generate a strategy, do another iteration of the inner loop for this
		// We could do this during the main double fixed point above, but we would generate surplus
		// strategy info for non-1 states during early iterations of the outer loop,
		// which are not straightforward to remove since this method does not know which states
		// already have valid strategy info from Prob0.
		// Notice that we only need to look at states in u (since we already know the answer),
		// so we restrict 'unknown' further 
		unknown.and(u);
		if (!min && strat != null) {
			v_done = false;
			v.clear();
			v.or(target);
			soln.clear();
			soln.or(target);
			while (!v_done) {
				mdp.prob1Estep(unknown, u, v, soln, strat);
				v_done = soln.equals(v);
				v.clear();
				v.or(soln);
			}
			u_done = v.equals(u);
		}

		// Finished precomputation
		timer = System.currentTimeMillis() - timer;
		if (!silentPrecomputations) {
			mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
			mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");
		}

		return u;
	}

	/**
	 * Prob1 precomputation algorithm (using predecessor relation).
	 * i.e. determine the states of an MDP which, with min/max probability 1,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * {@code min}=true gives Prob1A, {@code min}=false gives Prob1E.
	 * Optionally, for max only, store optimal (memoryless) strategy info for 1 states.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * @param pre the predecessor relation
	 */
	public BitSet prob1(MDP mdp, BitSet remain, BitSet target, boolean min, int strat[], PredecessorRelation pre)
	{
		BitSet result;
		long timer;

		// Start precomputation
		timer = System.currentTimeMillis();
		mainLog.println("Starting Prob1 (" + (min ? "min" : "max") + ")...");

		// Special case: no target states -> probability = 0 everywhere
		if (target.isEmpty()) {
			return new BitSet();
		}

		if (min) {
			result = prob1a(mdp, remain, target, pre);
		} else {
			result = prob1e(mdp, remain, target, strat, pre);
		}

		// Finished precomputation
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Prob1 (" + (min ? "min" : "max") + ")");
		mainLog.println(" took " + timer / 1000.0 + " seconds.");

		return result;
	}

	/**
	 * Prob1A (Pmin=1) precomputation algorithm (using predecessor relation).
	 * Determines the states of an MDP which, with min probability 1,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param pre predecessor relation
	 * @return the set of states with Pmin=1[ remain U target ]
	 */
	private BitSet prob1a(MDP mdp, BitSet remain, BitSet target, PredecessorRelation pre)
	{
		// this is an adaption of the Smin=1 algorithm in the Principles of Model Checking book
		// with added support for constrained reachability (remain U target)

		int n = mdp.getNumStates();

		// construct explicit set of remaining state in case that remain
		// is given implicitely (null = all states)
		if (remain == null) {
			remain = new BitSet();
			remain.set(0,n);
		}

		// Z
		// = (S \ remain) \ target
		// = the states that satisfy neither
		// remain nor target, i.e., where we know
		// a priori that they have probability 0,
		// as E[ remain U target ] is definitely false
		BitSet Z = new BitSet();
		Z.set(0,n);
		Z.andNot(remain);
		Z.andNot(target);

		// mainLog.println("Z = " + Z);

		// The set of states that are not target states
		BitSet notTarget = BitSetTools.complement(n, target);
		// mainLog.println("notTarget = " + notTarget);

		// The set of states that can reach Z without visiting
		// any target states.
		// We know that for those states Pmin<1, as we can
		// reach a Z state with positive probability
		BitSet canReachZ = pre.calculatePreStar(notTarget, Z, Z);
		// mainLog.println("canReachZ = " + canReachZ);

		// We now iteratively compute the set T of states
		// in !canReachZ that have a strategy of avoiding
		// to reach the target states ("safe states")

		// B = unsafe states, have Pmin>0 for reaching target
		// initialised with the target states
		BitSet B = (BitSet) target.clone();

		// T = potentially safe states, initializes with
		// ( S \ B ) \ canReachZ
		BitSet T = BitSetTools.complement(n, B);
		T.andNot(canReachZ);

		// E = unsafe states that have to be removed from T yet
		BitSetAndQueue E = new BitSetAndQueue(B);
		while (!E.isEmpty()) {
			int t = E.dequeue();

			// for removal, look at the predecessors
			// and remove choices that will lead to B with probability > 0
			for (int s : pre.getPre(t)) {
				// predecessor already in B, continue
				if (B.get(s)) {
					continue;
				}

				// is there a choice that allows to avoid B?
				boolean existsChoiceAvoidingB = false;

				for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
					if (!mdp.someSuccessorsInSet(s, choice, B)) {
						existsChoiceAvoidingB = true;
						break;
					}
				}

				// if there is no such choice, we have to remove s
				if (!existsChoiceAvoidingB) {
					// add to queue
					E.enqueue(s);
					// add to unsafe states B
					B.set(s);
					// remove from safe states T
					T.clear(s);
				}
			}
		}

		// The states in remain that are not target states
		BitSet remainAndNotTarget = BitSetTools.minus(remain, target);

		// spoilerStates = all states in T and all states that can reach Z without visiting B
		BitSet spoilerStates = BitSetTools.union(T, canReachZ);

		// canReachSpoilerStates = there exists strategy to reach a spoiler state with positive
		// probability => Pmin<1 ( remain U target )
		BitSet canReachSpoilerState = pre.calculatePreStar(remainAndNotTarget, spoilerStates, spoilerStates);
		// mainLog.println("canReachSpoilerState = " + canReachSpoilerState);
		
		// We are interested in Pmin=1 ( remain U target ),
		// which we obtain by complementing, yielding the set of states
		// where there is no way to avoid remain U target
		BitSet result = BitSetTools.complement(n, canReachSpoilerState);
		// mainLog.println("result = " + result);

		return result;
	}

	/**
	 * Prob1E (Pmax=1) precomputation algorithm (using predecessor relation).
	 * Determines the states of an MDP which, with max probability 1,
	 * reach a state in {@code target}, while remaining in those in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param pre predecessor relation
	 * @return the set of states with Pmax=1[ remain U target ]
	 */
	private BitSet prob1e(MDP mdp, BitSet remain, BitSet target, int strat[], PredecessorRelation pre)
	{
		// This algorithm is an adaption of the Smax=1 algorithm
		// in the Principles of Model Checking book

		int n = mdp.getNumStates();

		// Which choices remain enabled?
		ChoicesMask enabledChoices = new ChoicesMask(mdp);

		// We count the remaining, enabled choices in each
		// state so we can easily determine when
		// there are no more enabled choices for a state
		int[] remainingChoices = new int[n];
		for (int s = 0; s < n; s++) {
			remainingChoices[s] = mdp.getNumChoices(s);
		}

		// the set of state that can reach the target, i.e. E[ remain U target ]
		BitSet canReachTarget = pre.calculatePreStar(remain, target, target);
		// the set of state that can't reach the target, i.e. !E[ remain U target ]
		BitSet cantReachTarget = BitSetTools.complement(n, canReachTarget);

		// in addition to the PredecessorRelation, we need more detailed
		// information about the incoming choices for each state
		IncomingChoiceRelation incoming = IncomingChoiceRelation.forModel(this, mdp);

		// in each iteration step, U is the set of states that need to be
		// removed in this iteration because they can't reach the target
		BitSet U = cantReachTarget;
		
		// unknown is the set of states that can still reach the target,
		// but might need to be removed later on
		BitSet unknown = (BitSet) canReachTarget.clone();
		unknown.andNot(target);

		// unsafe are the states that have been determined to have Pmax<1[ remain U target ]
		BitSet unsafe = (BitSet) cantReachTarget.clone();

		int iterations = 0 ;
		while (!U.isEmpty()) {
			iterations++;
			// mainLog.println("Iteration " + iterations +": " +U);

			// states to remove in this iteration step:
			// all states in U (can not reach target anymore)
			// and other states where we have determined that they
			// don't have any choices anymore to remain in unknown
			BitSetAndQueue toRemove = new BitSetAndQueue(U);

			while (!toRemove.isEmpty()) {
				int t = toRemove.dequeue();

				// for each state t, we consider the incoming choices
				for (IncomingChoiceRelation.Choice choice : incoming.getIncomingChoices(t)) {
					// s is the predecessor of t, i.e., P(s,c,t) > 0
					int s = choice.getState();
					int c = choice.getChoice();

					if (!enabledChoices.isEnabled(s,c)) {
						// already disabled
						continue;
					}

					if (!unknown.get(s)) {
						// state already processed
						continue;
					}

					// We disable the choice and decrement the corresponding counter for s
					enabledChoices.disableChoice(s, choice.getChoice());
					remainingChoices[s]--;

					// there are no more remaining choices, remove s
					if (remainingChoices[s] == 0) {
						// s is not in unknown anymore
						unknown.clear(s);
						// s has become unsafe
						unsafe.set(s);
						// and we have to process the removal of s
						toRemove.enqueue(s);
					}
				}
			}

			// after removal, it may be the case that states in unknown and
			// that could previously reach target can not reach target anymore
			// so, we recompute canReachTarget, but now allowing only the
			// enabledChoices that remain enabled
			canReachTarget = incoming.calculatePreStar(unknown, target, target, enabledChoices);

			// we compute the set of states that have become unsafe because they
			// can't reach target anymore:
			// U = unknown states that can not reach target
			U = BitSetTools.minus(unknown, canReachTarget);

			// remove U from unknown
			unknown.andNot(U);
			// mark all U states as unsafe
			unsafe.or(U);

			// do loop once more, now with updated U
		}

		// the result set of states are the states in target
		// and those in unknown, as they have not been removed
		// during the iterations
		BitSet result = BitSetTools.union(unknown, target);
		
		if (strat != null) {
			// we have to generate a strategy for all remaining states

			BitSet done = new BitSet();
			BitSetAndQueue todo = new BitSetAndQueue(target);
			while (!todo.isEmpty()) {
				int t = todo.dequeue();

				if (done.get(t))
					continue;

				for (Choice choice: incoming.getIncomingChoices(t)) {
					int s = choice.getState();
					if (done.get(s)) {
						continue;
					}
					if (target.get(s)) {
						// target states don't need treatment
						continue;
					}

					int ch = choice.getChoice();
					if (!enabledChoices.isEnabled(s, ch)) {
						// choice is not a prob1e choice
						continue;
					}

					// s should be a prob1e \ target state at this point
					assert(unknown.get(s));

					if (strat[s] == -1) {
						// does not have a fixed choice yet
						// fix the choice, as this is the
						// "earliest" moment where s has been discovered,
						// guaranteeing that taking ch will almost surely
						// make progress towards the target
						strat[s] = ch;
						todo.enqueue(s);
					} else {
						// as strategy is fixed, was already
						// enqueued, nothing to do
					}
				}
			}
		}

		return result;
	}


	/**
	 * Compute reachability probabilities using value iteration.
	 * Optionally, store optimal (memoryless) strategy info. 
	 * @param mdp The MDP
	 * @param no Probability 0 states
	 * @param yes Probability 1 states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.  
	 */
	protected ModelCheckerResult computeReachProbsValIter(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
			throws PrismException
	{
		IterationMethodPower iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
		return doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, false, strat);
	}

	/**
	 * Compute reachability probabilities using value iteration.
	 * Optionally, store optimal (memoryless) strategy info.
	 * @param mdp The MDP
	 * @param no Probability 0 states
	 * @param yes Probability 1 states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten)
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param iterationMethod The iteration method
	 * @param topological Do topological value iteration?
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null), 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult doValueIterationReachProbs(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, IterationMethod iterationMethod, boolean topological, int strat[])
			throws PrismException
	{
		BitSet unknown;
		int i, n;
		double initVal;
		long timer;

		// Start value iteration
		timer = System.currentTimeMillis();
		String description = (min ? "min" : "max")
				+ (topological ? ", topological": "" )
				+ ", with " + iterationMethod.getDescriptionShort();

		mainLog.println("Starting value iteration (" + description + ")...");

		ExportIterations iterationsExport = null;
		if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
			iterationsExport = new ExportIterations("Explicit MDP ReachProbs value iteration (" + description + ")");
			mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
		}

		// Store num states
		n = mdp.getNumStates();

		// Initialise solution vectors. Use (where available) the following in order of preference:
		// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) passed in initial value; (4) initVal
		// where initVal is 0.0 or 1.0, depending on whether we converge from below/above. 
		initVal = (valIterDir == ValIterDir.BELOW) ? 0.0 : 1.0;
		if (init != null) {
			if (known != null) {
				for (i = 0; i < n; i++)
					init[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
			} else {
				for (i = 0; i < n; i++)
					init[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : init[i];
			}
		} else {
			init = new double[n];
			for (i = 0; i < n; i++)
				init[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 : initVal;
		}

		// Determine set of states actually need to compute values for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(yes);
		unknown.andNot(no);
		if (known != null)
			unknown.andNot(known);

		if (iterationsExport != null)
			iterationsExport.exportVector(init, 0);

		IterationMethod.IterationValIter iteration = iterationMethod.forMvMultMinMax(mdp, min, strat);
		iteration.init(init);

		IntSet unknownStates = IntSet.asIntSet(unknown);

		if (topological) {
			// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
			// states in unknown
			SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);

			IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
				soln[s] = mdp.mvMultJacMinMaxSingle(s, soln, min, strat);
			};

			// run the actual value iteration
			return iterationMethod.doTopologicalValueIteration(this, description, sccs, iteration, singletonSCCSolver, timer, iterationsExport);
		} else {
			// run the actual value iteration
			return iterationMethod.doValueIteration(this, description, iteration, unknownStates, timer, iterationsExport);
		}
	}

	/**
	 * Compute reachability probabilities using interval iteration.
	 * Optionally, store optimal (memoryless) strategy info.
	 * @param mdp The MDP
	 * @param no Probability 0 states
	 * @param yes Probability 1 states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten)
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param iterationMethod The iteration method
	 * @param topological Do topological value iteration?
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult doIntervalIterationReachProbs(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, IterationMethod iterationMethod, boolean topological, int strat[])
			throws PrismException
	{
		BitSet unknown;
		int i, n;
		double initBelow[], initAbove[];
		long timer;

		// Start value iteration
		timer = System.currentTimeMillis();
		String description = (min ? "min" : "max")
				+ (topological ? ", topological": "" )
				+ ", with " + iterationMethod.getDescriptionShort();

		mainLog.println("Starting interval iteration (" + description + ")...");

		ExportIterations iterationsExport = null;
		if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
			iterationsExport = new ExportIterations("Explicit MDP ReachProbs interval iteration (" + description + ")");
			mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
		}

		// Store num states
		n = mdp.getNumStates();

		// Create solution vector(s)
		initBelow = (init == null) ? new double[n] : init;
		initAbove = new double[n];

		// Initialise solution vectors. Use (where available) the following in order of preference:
		// (1) exact answer, if already known; (2) 1.0/0.0 if in yes/no; (3) initVal
		// where initVal is 0.0 or 1.0, depending on whether we converge from below/above.
		if (known != null && init != null) {
			for (i = 0; i < n; i++) {
				initBelow[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 0.0;
				initAbove[i] = known.get(i) ? init[i] : yes.get(i) ? 1.0 : no.get(i) ? 0.0 : 1.0;
			}
		} else {
			for (i = 0; i < n; i++) {
				initBelow[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 :  0.0;
				initAbove[i] = yes.get(i) ? 1.0 : no.get(i) ? 0.0 :  1.0;
			}
		}

		// Determine set of states actually need to compute values for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(yes);
		unknown.andNot(no);
		if (known != null)
			unknown.andNot(known);

		if (iterationsExport != null) {
			iterationsExport.exportVector(initBelow, 0);
			iterationsExport.exportVector(initAbove, 1);
		}

		OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);

		final boolean enforceMonotonicFromBelow = iiOptions.isEnforceMonotonicityFromBelow();
		final boolean enforceMonotonicFromAbove = iiOptions.isEnforceMonotonicityFromAbove();
		final boolean checkMonotonic = iiOptions.isCheckMonotonicity();

		if (!enforceMonotonicFromAbove) {
			getLog().println("Note: Interval iteration is configured to not enforce monotonicity from above.");
		}
		if (!enforceMonotonicFromBelow) {
			getLog().println("Note: Interval iteration is configured to not enforce monotonicity from below.");
		}

		IterationMethod.IterationIntervalIter below = iterationMethod.forMvMultMinMaxInterval(mdp, min, strat, true, enforceMonotonicFromBelow, checkMonotonic);
		IterationMethod.IterationIntervalIter above = iterationMethod.forMvMultMinMaxInterval(mdp, min, strat, false, enforceMonotonicFromAbove, checkMonotonic);
		below.init(initBelow);
		above.init(initAbove);

		IntSet unknownStates = IntSet.asIntSet(unknown);

		if (topological) {
			// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
			// states in unknown
			SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);

			IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
				soln[s] = mdp.mvMultJacMinMaxSingle(s, soln, min, strat);
			};

			// run the actual value iteration
			return iterationMethod.doTopologicalIntervalIteration(this, description, sccs, below, above, singletonSCCSolver, timer, iterationsExport);
		} else {
			// run the actual value iteration
			return iterationMethod.doIntervalIteration(this, description, below, above, unknownStates, timer, iterationsExport);
		}
	}

	/**
	 * Compute reachability probabilities using Gauss-Seidel (including Jacobi-style updates).
	 * @param mdp The MDP
	 * @param no Probability 0 states
	 * @param yes Probability 1 states
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.  
	 */
	protected ModelCheckerResult computeReachProbsGaussSeidel(MDP mdp, BitSet no, BitSet yes, boolean min, double init[], BitSet known, int strat[])
			throws PrismException
	{
		IterationMethodGS iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
		return doValueIterationReachProbs(mdp, no, yes, min, init, known, iterationMethod, false, strat);
	}

	/**
	 * Compute reachability probabilities using policy iteration.
	 * Optionally, store optimal (memoryless) strategy info. 
	 * @param mdp: The MDP
	 * @param no: Probability 0 states
	 * @param yes: Probability 1 states
	 * @param min: Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 */
	protected ModelCheckerResult computeReachProbsPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
	{
		ModelCheckerResult res;
		int i, n, iters, totalIters;
		double soln[], soln2[];
		boolean done;
		long timer;
		DTMCModelChecker mcDTMC;
		DTMC dtmc;

		// Re-use solution to solve each new policy (strategy)?
		boolean reUseSoln = true;

		// Start policy iteration
		timer = System.currentTimeMillis();
		mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");

		// Create a DTMC model checker (for solving policies)
		mcDTMC = new DTMCModelChecker(this);
		mcDTMC.inheritSettings(this);
		mcDTMC.setLog(new PrismDevNullLog());

		// Store num states
		n = mdp.getNumStates();

		// Create solution vectors
		soln = new double[n];
		soln2 = new double[n];

		// Initialise solution vectors.
		for (i = 0; i < n; i++)
			soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;

		// If not passed in, create new storage for strategy and initialise
		// Initial strategy just picks first choice (0) everywhere
		if (strat == null) {
			strat = new int[n];
			for (i = 0; i < n; i++)
				strat[i] = 0;
		}
		// Otherwise, just initialise for states not in yes/no
		// (Optimal choices for yes/no should already be known)
		else {
			for (i = 0; i < n; i++)
				if (!(no.get(i) || yes.get(i)))
					strat[i] = 0;
		}

		boolean backwardsGS = (linEqMethod == LinEqMethod.BACKWARDS_GAUSS_SEIDEL);

		// Start iterations
		iters = totalIters = 0;
		done = false;
		while (!done) {
			iters++;
			// Solve induced DTMC for strategy
			dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
			res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, reUseSoln ? soln : null, null, backwardsGS);
			soln = res.soln;
			totalIters += res.numIters;
			// Check if optimal, improve non-optimal choices
			mdp.mvMultMinMax(soln, min, soln2, null, false, null);
			done = true;
			for (i = 0; i < n; i++) {
				// Don't look at no/yes states - we may not have strategy info for them,
				// so they might appear non-optimal
				if (no.get(i) || yes.get(i))
					continue;
				if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
					done = false;
					List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
					// Only update strategy if strictly better
					if (!opt.contains(strat[i]))
						strat[i] = opt.get(0);
				}
			}
		}

		// Finished policy iteration
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Policy iteration");
		mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");

		// Return results
		// (Note we don't add the strategy - the one passed in is already there
		// and might have some existing choices stored for other states).
		res = new ModelCheckerResult();
		res.soln = soln;
		res.numIters = totalIters;
		res.timeTaken = timer / 1000.0;
		return res;
	}

	/**
	 * Compute reachability probabilities using modified policy iteration.
	 * @param mdp: The MDP
	 * @param no: Probability 0 states
	 * @param yes: Probability 1 states
	 * @param min: Min or max probabilities (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 */
	protected ModelCheckerResult computeReachProbsModPolIter(MDP mdp, BitSet no, BitSet yes, boolean min, int strat[]) throws PrismException
	{
		ModelCheckerResult res;
		int i, n, iters, totalIters;
		double soln[], soln2[];
		boolean done;
		long timer;
		DTMCModelChecker mcDTMC;
		DTMC dtmc;

		// Start value iteration
		timer = System.currentTimeMillis();
		mainLog.println("Starting modified policy iteration (" + (min ? "min" : "max") + ")...");

		// Create a DTMC model checker (for solving policies)
		mcDTMC = new DTMCModelChecker(this);
		mcDTMC.inheritSettings(this);
		mcDTMC.setLog(new PrismDevNullLog());

		// Limit iters for DTMC solution - this implements "modified" policy iteration
		mcDTMC.setMaxIters(100);
		mcDTMC.setErrorOnNonConverge(false);

		// Store num states
		n = mdp.getNumStates();

		// Create solution vectors
		soln = new double[n];
		soln2 = new double[n];

		// Initialise solution vectors.
		for (i = 0; i < n; i++)
			soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;

		// If not passed in, create new storage for strategy and initialise
		// Initial strategy just picks first choice (0) everywhere
		if (strat == null) {
			strat = new int[n];
			for (i = 0; i < n; i++)
				strat[i] = 0;
		}
		// Otherwise, just initialise for states not in yes/no
		// (Optimal choices for yes/no should already be known)
		else {
			for (i = 0; i < n; i++)
				if (!(no.get(i) || yes.get(i)))
					strat[i] = 0;
		}

		boolean backwardsGS = (linEqMethod == LinEqMethod.BACKWARDS_GAUSS_SEIDEL);

		// Start iterations
		iters = totalIters = 0;
		done = false;
		while (!done) {
			iters++;
			// Solve induced DTMC for strategy
			dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
			res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, soln, null, backwardsGS);
			soln = res.soln;
			totalIters += res.numIters;
			// Check if optimal, improve non-optimal choices
			mdp.mvMultMinMax(soln, min, soln2, null, false, null);
			done = true;
			for (i = 0; i < n; i++) {
				// Don't look at no/yes states - we don't store strategy info for them,
				// so they might appear non-optimal
				if (no.get(i) || yes.get(i))
					continue;
				if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
					done = false;
					List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
					strat[i] = opt.get(0);
				}
			}
		}

		// Finished policy iteration
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Modified policy iteration");
		mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");

		// Return results
		// (Note we don't add the strategy - the one passed in is already there
		// and might have some existing choices stored for other states).
		res = new ModelCheckerResult();
		res.soln = soln;
		res.numIters = totalIters;
		res.timeTaken = timer / 1000.0;
		return res;
	}

	/**
	 * Construct strategy information for min/max reachability probabilities.
	 * (More precisely, list of indices of choices resulting in min/max.)
	 * (Note: indices are guaranteed to be sorted in ascending order.)
	 * @param mdp The MDP
	 * @param state The state to generate strategy info for
	 * @param target The set of target states to reach
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param lastSoln Vector of values from which to recompute in one iteration 
	 */
	public List<Integer> probReachStrategy(MDP mdp, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
	{
		double val = mdp.mvMultMinMaxSingle(state, lastSoln, min, null);
		return mdp.mvMultMinMaxSingleChoices(state, lastSoln, min, val);
	}

	/**
	 * Compute bounded reachability probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target} within k steps.
	 * @param mdp The MDP
	 * @param target Target states
	 * @param k Bound
	 * @param min Min or max probabilities (true=min, false=max)
	 */
	public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet target, int k, boolean min) throws PrismException
	{
		return computeBoundedReachProbs(mdp, null, target, k, min, null, null);
	}

	/**
	 * Compute bounded until probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target},
	 * within k steps, and while remaining in states in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param k Bound
	 * @param min Min or max probabilities (true=min, false=max)
	 */
	public ModelCheckerResult computeBoundedUntilProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min) throws PrismException
	{
		return computeBoundedReachProbs(mdp, remain, target, k, min, null, null);
	}

	/**
	 * Compute bounded reachability/until probabilities.
	 * i.e. compute the min/max probability of reaching a state in {@code target},
	 * within k steps, and while remaining in states in {@code remain}.
	 * @param mdp The MDP
	 * @param remain Remain in these states (optional: null means "all")
	 * @param target Target states
	 * @param k Bound
	 * @param min Min or max probabilities (true=min, false=max)
	 * @param init Optionally, an initial solution vector (may be overwritten) 
	 * @param results Optional array of size k+1 to store (init state) results for each step (null if unused)
	 */
	public ModelCheckerResult computeBoundedReachProbs(MDP mdp, BitSet remain, BitSet target, int k, boolean min, double init[], double results[])
			throws PrismException
	{
		ModelCheckerResult res = null;
		BitSet unknown;
		int i, n, iters;
		double soln[], soln2[], tmpsoln[];
		long timer;

		// Start bounded probabilistic reachability
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting bounded probabilistic reachability (" + (min ? "min" : "max") + ")...");

		// Store num states
		n = mdp.getNumStates();

		// Create solution vector(s)
		soln = new double[n];
		soln2 = (init == null) ? new double[n] : init;

		// Initialise solution vectors. Use passed in initial vector, if present
		if (init != null) {
			for (i = 0; i < n; i++)
				soln[i] = soln2[i] = target.get(i) ? 1.0 : init[i];
		} else {
			for (i = 0; i < n; i++)
				soln[i] = soln2[i] = target.get(i) ? 1.0 : 0.0;
		}
		// Store intermediate results if required
		// (compute min/max value over initial states for first step)
		if (results != null) {
			// TODO: whether this is min or max should be specified somehow
			results[0] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
		}

		// Determine set of states actually need to perform computation for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(target);
		if (remain != null)
			unknown.and(remain);

		// Start iterations
		iters = 0;
		while (iters < k) {
			iters++;
			// Matrix-vector multiply and min/max ops
			mdp.mvMultMinMax(soln, min, soln2, unknown, false, null);
			// Store intermediate results if required
			// (compute min/max value over initial states for this step)
			if (results != null) {
				// TODO: whether this is min or max should be specified somehow
				results[iters] = Utils.minMaxOverArraySubset(soln2, mdp.getInitialStates(), true);
			}
			// Swap vectors for next iter
			tmpsoln = soln;
			soln = soln2;
			soln2 = tmpsoln;
		}

		// Finished bounded probabilistic reachability
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Bounded probabilistic reachability (" + (min ? "min" : "max") + ")");
		mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");

		// Return results
		res = new ModelCheckerResult();
		res.soln = soln;
		res.lastSoln = soln2;
		res.numIters = iters;
		res.timeTaken = timer / 1000.0;
		res.timePre = 0.0;
		return res;
	}

	/**
	 * Compute expected cumulative (step-bounded) rewards.
	 * i.e. compute the min/max reward accumulated within {@code k} steps.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param min Min or max rewards (true=min, false=max)
	 */
	public ModelCheckerResult computeCumulativeRewards(MDP mdp, MDPRewards mdpRewards, int k, boolean min) throws PrismException
	{
		ModelCheckerResult res = null;
		int i, n, iters;
		long timer;
		double soln[], soln2[], tmpsoln[];

		// Start expected cumulative reward
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting expected cumulative reward (" + (min ? "min" : "max") + ")...");

		// Store num states
		n = mdp.getNumStates();

		// Create/initialise solution vector(s)
		soln = new double[n];
		soln2 = new double[n];
		for (i = 0; i < n; i++)
			soln[i] = soln2[i] = 0.0;

		// Start iterations
		iters = 0;
		while (iters < k) {
			iters++;
			// Matrix-vector multiply and min/max ops
			mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null);
			// Swap vectors for next iter
			tmpsoln = soln;
			soln = soln2;
			soln2 = tmpsoln;
		}

		// Finished value iteration
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Expected cumulative reward (" + (min ? "min" : "max") + ")");
		mainLog.println(" took " + iters + " iterations and " + timer / 1000.0 + " seconds.");

		// Return results
		res = new ModelCheckerResult();
		res.soln = soln;
		res.numIters = iters;
		res.timeTaken = timer / 1000.0;

		return res;
	}

	/**
	 * Compute upper bound for maximum expected reward, with the method specified in the settings.
	 * @param mdp the model
	 * @param mdpRewards the rewards
	 * @param target the target states
	 * @param unknown the states that are not target or infinity states
	 * @param inf the infinite states
	 * @return upper bound on Rmax=?[ F target ] for all states
	 */
	double computeReachRewardsMaxUpperBound(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
	{
		if (unknown.isEmpty()) {
			mainLog.println("Skipping upper bound computation, no unknown states...");
			return 0;
		}

		// inf and target states become trap states (with dropped choices)
		BitSet trapStates = (BitSet) target.clone();
		trapStates.or(inf);
		MDP cleanedMDP = new MDPDroppedAllChoices(mdp, trapStates);

		OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);

		double upperBound = 0.0;
		String method = null;
		switch (iiOptions.getBoundMethod()) {
		case VARIANT_1_COARSE:
			upperBound = computeReachRewardsMaxUpperBoundVariant1Coarse(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "variant 1, coarse";
			break;
		case VARIANT_1_FINE:
			upperBound = computeReachRewardsMaxUpperBoundVariant1Fine(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "variant 1, fine";
			break;
		case DEFAULT:
		case VARIANT_2:
			upperBound = computeReachRewardsMaxUpperBoundVariant2(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "variant 2";
			break;
		case DSMPI:
			throw new PrismNotSupportedException("Dijkstra Sweep MPI upper bound heuristic can not be used for Rmax");
		}

		if (method == null) {
			throw new PrismException("Unknown upper bound heuristic");
		}

		mainLog.println("Upper bound for max expectation (" + method + "): " + upperBound);
		return upperBound;
	}

	/**
	 * Compute upper bound for minimum expected reward, with the method specified in the settings.
	 * @param mdp the model
	 * @param mdpRewards the rewards
	 * @param target the target states
	 * @param unknown the states that are not target or infinity states
	 * @param inf the infinite states
	 * @return upper bound on Rmin=?[ F target ] for all unknown states
	 */
	double computeReachRewardsMinUpperBound(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
	{
		// inf and target states become trap states (with dropped choices)
		BitSet trapStates = (BitSet) target.clone();
		trapStates.or(inf);
		MDP cleanedMDP = new MDPDroppedAllChoices(mdp, trapStates);

		OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);

		double upperBound = 0.0;
		String method = null;
		switch (iiOptions.getBoundMethod()) {
		case DEFAULT:
		case DSMPI:
			upperBound = DijkstraSweepMPI.computeUpperBound(this, mdp, mdpRewards, target, unknown);
			method = "Dijkstra Sweep MPI";
			break;
		case VARIANT_1_COARSE:
			upperBound = computeReachRewardsMaxUpperBoundVariant1Coarse(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "using Rmax upper bound via variant 1, coarse";
			break;
		case VARIANT_1_FINE:
			upperBound = computeReachRewardsMaxUpperBoundVariant1Fine(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "using Rmax upper bound via variant 1, fine";
			break;
		case VARIANT_2:
			upperBound = computeReachRewardsMaxUpperBoundVariant2(cleanedMDP, mdpRewards, target, unknown, inf);
			method = "using Rmax upper bound via variant 2";
			break;
		}

		if (method == null) {
			throw new PrismException("Unknown upper bound heuristic");
		}

		mainLog.println("Upper bound for min expectation (" + method + "): " + upperBound);
		return upperBound;
	}

	/**
	 * Return true if the MDP is contracting for all states in the 'unknown'
	 * set, i.e., if Pmin=1( unknown U target) holds.
	 */
	private boolean isContracting(MDP mdp, BitSet unknown, BitSet target)
	{
		// compute Pmin=1( unknown U target )
		BitSet pmin1 = prob1(mdp, unknown, target, true, null);
		BitSet tmp = (BitSet) unknown.clone();
		tmp.andNot(pmin1);
		if (!tmp.isEmpty()) {
			// unknown is not contained in pmin1, not contracting
			return false;
		}
		return true;
	}

	/**
	 * Compute upper bound for maximum expected reward (variant 1, coarse),
	 * i.e., does not compute separate q_t / p_t per SCC.
	 * Uses Rs = S, i.e., does not take reachability into account.
	 * @param mdp the model
	 * @param mdpRewards the rewards
	 * @param target the target states
	 * @param unknown the states that are not target or infinity states
	 * @return upper bound on Rmax=?[ F target ] for all states
	 */
	double computeReachRewardsMaxUpperBoundVariant1Coarse(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
	{
		double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
		double[] maxRews = new double[mdp.getNumStates()];
		int[] Ct = new int[mdp.getNumStates()];

		StopWatch timer = new StopWatch(getLog());
		timer.start("computing an upper bound for maximal expected reward");

		SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, null);
		BitSet trivial = new BitSet();

		double q = 0;
		for (int scc = 0, numSCCs = sccs.getNumSCCs(); scc < numSCCs; scc++) {
			IntSet statesForSCC = sccs.getStatesForSCC(scc);

			int cardinality = statesForSCC.cardinality();

			PrimitiveIterator.OfInt itSCC = statesForSCC.iterator();
			while (itSCC.hasNext()) {
				int s = itSCC.nextInt();
				Ct[s] = cardinality;

				boolean hasSelfloop = false;
				for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
					double probRemain = 0;
					boolean allRemain = true;  // all successors remain in the SCC?
					for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
						Entry<Integer, Double> t = it.next();
						if (statesForSCC.get(t.getKey())) {
							probRemain += t.getValue();
							hasSelfloop = true;
						} else {
							allRemain = false;
						}
					}

					if (!allRemain) { // action in the set X
						q = Math.max(q, probRemain);
					}
				}

				if (cardinality == 1 && !hasSelfloop) {
					trivial.set(s);
				}
			}
		}

		double p = 1;
		for (int s = 0; s < mdp.getNumStates(); s++) {
			double maxRew = 0;
			for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
				for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
					Entry<Integer, Double> t = it.next();
					p = Math.min(p, t.getValue());

					double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
					maxRew = Math.max(maxRew, rew);
				}
			}
			maxRews[s] = maxRew;
		}

		double upperBound = 0;
		for (int s = 0; s < mdp.getNumStates(); s++) {
			if (target.get(s) || inf.get(s)) {
				// inf or target states: not relevant, set visits to 0, ignore in summation
				boundsOnExpectedVisits[s] = 0.0;
			} else if (unknown.get(s)) {
				if (trivial.get(s)) {
					// s is a trivial SCC: seen at most once
					boundsOnExpectedVisits[s] = 1.0;
				} else {
					boundsOnExpectedVisits[s] = 1 / (Math.pow(p, Ct[s]-1) * (1.0-q));
				}
				upperBound += boundsOnExpectedVisits[s] * maxRews[s];
			}
		}

		if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
			mainLog.println("Upper bound for max expectation computation (variant 1, coarse):");
			mainLog.println("p = " + p);
			mainLog.println("q = " + q);
			mainLog.println("|Ct| = " + Arrays.toString(Ct));
			mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
			mainLog.println("maxRews = " + Arrays.toString(maxRews));
		}

		timer.stop();
		// mainLog.println("Upper bound for max expectation (variant 1, coarse): " + upperBound);

		if (!Double.isFinite(upperBound)) {
			throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
		}

		return upperBound;
	}

	/**
	 * Compute upper bound for maximum expected reward (variant 1, fine).
	 * i.e., does compute separate q_t / p_t per SCC.
	 * Uses Rs = S, i.e., does not take reachability into account.
	 * @param mdp the model
	 * @param mdpRewards the rewards
	 * @param target the target states
	 * @param unknown the states that are not target or infinity states
	 * @return upper bound on Rmax=?[ F target ] for all states
	 */
	double computeReachRewardsMaxUpperBoundVariant1Fine(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
	{
		double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
		double[] qt = new double[mdp.getNumStates()];
		double[] pt = new double[mdp.getNumStates()];
		double[] maxRews = new double[mdp.getNumStates()];
		int[] Ct = new int[mdp.getNumStates()];

		StopWatch timer = new StopWatch(getLog());
		timer.start("computing an upper bound for maximal expected reward");

		SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, null);
		BitSet trivial = new BitSet();

		for (int scc = 0, numSCCs = sccs.getNumSCCs(); scc < numSCCs; scc++) {
			IntSet statesForSCC = sccs.getStatesForSCC(scc);

			double q = 0;
			double p = 1;

			int cardinality = statesForSCC.cardinality();

			PrimitiveIterator.OfInt itSCC = statesForSCC.iterator();
			while (itSCC.hasNext()) {
				int s = itSCC.nextInt();

				Ct[s] = cardinality;
				boolean hasSelfloop = false;

				for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {

					double probRemain = 0;
					boolean allRemain = true;  // all successors remain in the SCC?
					for (Iterator<Entry<Integer, Double>> it = mdp.getTransitionsIterator(s, ch); it.hasNext(); ) {
						Entry<Integer, Double> t = it.next();
						if (statesForSCC.get(t.getKey())) {
							probRemain += t.getValue();
							p = Math.min(p, t.getValue());
							hasSelfloop = true;
						} else {
							allRemain = false;
						}
					}

					if (!allRemain) { // action in the set Xt
						q = Math.max(q, probRemain);
					}
				}

				if (cardinality == 1 && !hasSelfloop) {
					trivial.set(s);
				}
			}

			for (int s : statesForSCC) {
				qt[s] = q;
				pt[s] = p;
			}
		}

		for (int s = 0; s < mdp.getNumStates(); s++) {
			double maxRew = 0;
			for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
				double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
				maxRew = Math.max(maxRew, rew);
			}
			maxRews[s] = maxRew;
		}

		double upperBound = 0;
		for (int s = 0; s < mdp.getNumStates(); s++) {
			if (target.get(s) || inf.get(s)) {
				// inf or target states: not relevant, set visits to 0, ignore in summation
				boundsOnExpectedVisits[s] = 0.0;
			} else if (unknown.get(s)) {
				if (trivial.get(s)) {
					// s is a trivial SCC: seen at most once
					boundsOnExpectedVisits[s] = 1.0;
				} else {
					boundsOnExpectedVisits[s] = 1 / (Math.pow(pt[s], Ct[s]-1) * (1.0-qt[s]));
				}
				upperBound += boundsOnExpectedVisits[s] * maxRews[s];
			}
		}

		timer.stop();

		if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
			mainLog.println("Upper bound for max expectation computation (variant 1, fine):");
			mainLog.println("pt = " + Arrays.toString(pt));
			mainLog.println("qt = " + Arrays.toString(qt));
			mainLog.println("|Ct| = " + Arrays.toString(Ct));
			mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
			mainLog.println("maxRews = " + Arrays.toString(maxRews));
		}

		// mainLog.println("Upper bound for max expectation (variant 1, fine): " + upperBound);

		if (!Double.isFinite(upperBound)) {
			throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
		}

		return upperBound;
	}

	/**
	 * Compute upper bound for maximum expected reward (variant 2).
	 * Uses Rs = S, i.e., does not take reachability into account.
	 * @param dtmc the model
	 * @param mcRewards the rewards
	 * @param target the target states
	 * @param unknown the states that are not target or infinity states
	 * @param inf the infinity states
	 * @return upper bound on R=?[ F target ] for all states
	 */
	double computeReachRewardsMaxUpperBoundVariant2(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet unknown, BitSet inf) throws PrismException
	{
		double[] dt = new double[mdp.getNumStates()];
		double[] boundsOnExpectedVisits = new double[mdp.getNumStates()];
		double[] maxRews = new double[mdp.getNumStates()];

		StopWatch timer = new StopWatch(getLog());
		timer.start("computing an upper bound for expected reward");

		SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);

		BitSet T = (BitSet) target.clone();

		@SuppressWarnings("unused")
		int i = 0;
		while (true) {
			BitSet Si = new BitSet();
			i++;

			// TODO: might be inefficient, worst-case quadratic runtime...
			for (PrimitiveIterator.OfInt it = IterableBitSet.getClearBits(T, mdp.getNumStates() -1 ).iterator(); it.hasNext(); ) {
				int s = it.nextInt();
				// mainLog.println("Check " + s + " against " + T);
				boolean allActionsReachT = true;
				for (int choice = 0, choices = mdp.getNumChoices(s); choice < choices; choice++) {
					if (!mdp.someSuccessorsInSet(s, choice, T)) {
						allActionsReachT = false;
						break;
					}
				}
				if (allActionsReachT) {
					Si.set(s);
				}
			}

			if (Si.isEmpty()) {
				break;
			}

			// mainLog.println("S" + i + " = " + Si);
			// mainLog.println("T = " + T);

			for (PrimitiveIterator.OfInt it = IterableBitSet.getSetBits(Si).iterator(); it.hasNext(); ) {
				final int t = it.nextInt();
				final int sccIndexForT = sccs.getSCCIndex(t);

				double min = Double.POSITIVE_INFINITY;
				for (int choice = 0, choices = mdp.getNumChoices(t); choice < choices; choice++) {
					// mainLog.println("State " + t + ", choice = " + choice);
					double d = mdp.sumOverTransitions(t, choice, (int __, int u, double prob) -> {
						// mainLog.println("t = " + t + ", u = " + u + ", prob = " + prob);
						if (!T.get(u))
							return 0.0;

						boolean inSameSCC = (sccs.getSCCIndex(u) == sccIndexForT);
						double d_u_t = inSameSCC ? dt[u] : 1.0;
						// mainLog.println("d_u_t = " + d_u_t);
						return d_u_t * prob;
					});
					if (d < min) {
						min = d;
					}
				}
				dt[t] = min;
				// mainLog.println("d["+t+"] = " + dt[t]);
			}

			T.or(Si);
		}

		for (int s = 0; s < mdp.getNumStates(); s++) {
			double maxRew = 0;
			for (int ch = 0; ch < mdp.getNumChoices(s); ch++) {
				double rew = mdpRewards.getStateReward(s) + mdpRewards.getTransitionReward(s, ch);
				maxRew = Math.max(maxRew, rew);
			}
			maxRews[s] = maxRew;
		}

		double upperBound = 0;
		for (PrimitiveIterator.OfInt it = IterableBitSet.getSetBits(unknown).iterator(); it.hasNext();) {
			int s = it.nextInt();
			boundsOnExpectedVisits[s] = 1 / dt[s];
			upperBound += boundsOnExpectedVisits[s] * maxRews[s];
		}

		timer.stop();

		if (OptionsIntervalIteration.from(this).isBoundComputationVerbose()) {
			mainLog.println("Upper bound for max expectation computation (variant 2):");
			mainLog.println("d_t = " + Arrays.toString(dt));
			mainLog.println("ζ* = " + Arrays.toString(boundsOnExpectedVisits));
		}

		// mainLog.println("Upper bound for expectation (variant 2): " + upperBound);

		if (!Double.isFinite(upperBound)) {
			throw new PrismException("Problem computing an upper bound for the expectation, did not get finite result");
		}

		return upperBound;
	}

	/**
	 * Compute expected instantaneous reward,
	 * i.e. compute the min/max expected reward of the states after {@code k} steps.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param k the number of steps
	 * @param min Min or max rewards (true=min, false=max)
	 */
	public ModelCheckerResult computeInstantaneousRewards(MDP mdp, MDPRewards mdpRewards, final int k, boolean min)
	{
		ModelCheckerResult res = null;
		int i, n, iters;
		double soln[], soln2[], tmpsoln[];
		long timer;

		// Store num states
		n = mdp.getNumStates();

		// Start backwards transient computation
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting backwards instantaneous rewards computation...");

		// Create solution vector(s)
		soln = new double[n];
		soln2 = new double[n];

		// Initialise solution vectors.
		for (i = 0; i < n; i++)
			soln[i] = mdpRewards.getStateReward(i);

		// Start iterations
		for (iters = 0; iters < k; iters++) {
			// Matrix-vector multiply
			mdp.mvMultMinMax(soln, min, soln2, null, false, null);
			// Swap vectors for next iter
			tmpsoln = soln;
			soln = soln2;
			soln2 = tmpsoln;
		}

		// Finished backwards transient computation
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Backwards transient instantaneous rewards computation");
		mainLog.println(" took " + iters + " iters and " + timer / 1000.0 + " seconds.");

		// Return results
		res = new ModelCheckerResult();
		res.soln = soln;
		res.lastSoln = soln2;
		res.numIters = iters;
		res.timeTaken = timer / 1000.0;
		res.timePre = 0.0;
		return res;
	}

	/**
	 * Compute total expected rewards.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param min Min or max rewards (true=min, false=max)
	 */
	public ModelCheckerResult computeTotalRewards(MDP mdp, MDPRewards mdpRewards, boolean min) throws PrismException
	{
		if (min) {
			throw new PrismNotSupportedException("Minimum total expected reward not supported in explicit engine");
		} else {
			// max. We don't know if there are positive ECs, so we can't skip precomputation
			return computeTotalRewardsMax(mdp, mdpRewards, false);
		}
	}

	/**
	 * Compute maximal total expected rewards.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param noPositiveECs if true, there are no positive ECs, i.e., all states have finite values (skip precomputation)
	 */
	public ModelCheckerResult computeTotalRewardsMax(MDP mdp, MDPRewards mdpRewards, boolean noPositiveECs) throws PrismException
	{
		ModelCheckerResult res = null;
		int n;
		long timer;
		BitSet inf;

		// Local copy of setting
		MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;

		// Switch to a supported method, if necessary
		if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) {
			mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
			mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
		}
		if (getDoIntervalIteration()) {
			throw new PrismNotSupportedException("Interval iteration for total rewards is currently not supported");
		}

		// Start expected total reward
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting total expected reward (max)...");

		// Store num states
		n = mdp.getNumStates();

		long timerPre;

		if (noPositiveECs) {
			// no inf states
			inf = new BitSet();
			timerPre = 0;
		} else {
			mainLog.println("Precomputation: Find positive end components...");

			timerPre = System.currentTimeMillis();

			ECComputer ecs = ECComputer.createECComputer(this, mdp);
			ecs.computeMECStates();
			BitSet positiveECs = new BitSet();
			for (BitSet ec : ecs.getMECStates()) {
				// check if this MEC is positive
				boolean positiveEC = false;
				for (int state : new IterableStateSet(ec, n)) {
					if (mdpRewards.getStateReward(state) > 0) {
						// state with positive reward in this MEC
						positiveEC = true;
						break;
					}
					for (int choice = 0, numChoices = mdp.getNumChoices(state); choice < numChoices; choice++) {
						if (mdpRewards.getTransitionReward(state, choice) > 0 &&
								mdp.allSuccessorsInSet(state, choice, ec)) {
							// choice from this state with positive reward back into this MEC
							positiveEC = true;
							break;
						}
					}
				}
				if (positiveEC) {
					positiveECs.or(ec);
				}
			}

			// inf = Pmax[ <> positiveECs ] > 0
			//     = ! (Pmax[ <> positiveECs ] = 0)
			inf = prob0(mdp, null, positiveECs, false, null);  // Pmax[ <> positiveECs ] = 0
			inf.flip(0,n);  // !(Pmax[ <> positive ECs ] = 0) = Pmax[ <> positiveECs ] > 0

			timerPre = System.currentTimeMillis() - timerPre;
			mainLog.println("Precomputation took " + timerPre / 1000.0 + " seconds, " + inf.cardinality() + " infinite states, " + (n - inf.cardinality()) + " states remaining.");
		}

		// Compute rewards
		// do standard max reward calculation, but with empty target set
		switch (mdpSolnMethod) {
		case VALUE_ITERATION:
			res = computeReachRewardsValIter(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
			break;
		case GAUSS_SEIDEL:
			res = computeReachRewardsGaussSeidel(mdp, mdpRewards, new BitSet(), inf, false, null, null, null);
			break;
		case POLICY_ITERATION:
			res = computeReachRewardsPolIter(mdp, mdpRewards, new BitSet(), inf, false, null);
			break;
		default:
			throw new PrismException("Unknown MDP solution method " + mdpSolnMethod.fullName());
		}

		// Finished expected total reward
		timer = System.currentTimeMillis() - timer;
		mainLog.println("Expected total reward took " + timer / 1000.0 + " seconds.");

		// Update time taken
		res.timeTaken = timer / 1000.0;
		res.timePre = timerPre / 1000.0;

		// Return results
		return res;
	}


	/**
	 * Compute expected reachability rewards.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param min Min or max rewards (true=min, false=max)
	 */
	public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
	{
		return computeReachRewards(mdp, mdpRewards, target, min, null, null);
	}

	/**
	 * Compute expected reachability rewards.
	 * i.e. compute the min/max reward accumulated to reach a state in {@code target}.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param min Min or max rewards (true=min, false=max)
	 * @param init Optionally, an initial solution vector (may be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values).  
	 * Also, 'known' values cannot be passed for some solution methods, e.g. policy iteration.  
	 */
	public ModelCheckerResult computeReachRewards(MDP mdp, MDPRewards mdpRewards, BitSet target, boolean min, double init[], BitSet known)
			throws PrismException
	{
		ModelCheckerResult res = null;
		BitSet inf;
		int n, numTarget, numInf;
		long timer, timerProb1;
		int strat[] = null;
		// Local copy of setting
		MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;

		// Switch to a supported method, if necessary
		if (!(mdpSolnMethod == MDPSolnMethod.VALUE_ITERATION || mdpSolnMethod == MDPSolnMethod.GAUSS_SEIDEL || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION)) {
			mdpSolnMethod = MDPSolnMethod.GAUSS_SEIDEL;
			mainLog.printWarning("Switching to MDP solution method \"" + mdpSolnMethod.fullName() + "\"");
		}

		// Check for some unsupported combinations
		if (mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
			if (known != null) {
				throw new PrismException("Policy iteration methods cannot be passed 'known' values for some states");
			}
		}
		if (doIntervalIteration) {
			if (mdpSolnMethod != MDPSolnMethod.VALUE_ITERATION && mdpSolnMethod != MDPSolnMethod.GAUSS_SEIDEL) {
				throw new PrismNotSupportedException("Currently, explicit engine only supports interval iteration with value iteration or Gauss-Seidel for MDPs");
			}
		}

		// Start expected reachability
		timer = System.currentTimeMillis();
		mainLog.println("\nStarting expected reachability (" + (min ? "min" : "max") + ")...");

		// Check for deadlocks in non-target state (because breaks e.g. prob1)
		mdp.checkForDeadlocks(target);

		// Store num states
		n = mdp.getNumStates();
		// Optimise by enlarging target set (if more info is available)
		if (init != null && known != null && !known.isEmpty()) {
			BitSet targetNew = (BitSet) target.clone();
			for (int i : new IterableBitSet(known)) {
				if (init[i] == 1.0) {
					targetNew.set(i);
				}
			}
			target = targetNew;
		}

		// If required, export info about target states 
		if (getExportTarget()) {
			BitSet bsInit = new BitSet(n);
			for (int i = 0; i < n; i++) {
				bsInit.set(i, mdp.isInitialState(i));
			}
			List<BitSet> labels = Arrays.asList(bsInit, target);
			List<String> labelNames = Arrays.asList("init", "target");
			mainLog.println("\nExporting target states info to file \"" + getExportTargetFilename() + "\"...");
			exportLabels(mdp, labels, labelNames, Prism.EXPORT_PLAIN, new PrismFileLog(getExportTargetFilename()));
		}

		// If required, create/initialise strategy storage
		// Set choices to -1, denoting unknown
		// (except for target states, which are -2, denoting arbitrary)
		if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
			strat = new int[n];
			for (int i = 0; i < n; i++) {
				strat[i] = target.get(i) ? -2 : -1;
			}
		}
		
		// Precomputation (not optional)
		timerProb1 = System.currentTimeMillis();
		inf = prob1(mdp, null, target, !min, strat);
		inf.flip(0, n);
		timerProb1 = System.currentTimeMillis() - timerProb1;
		
		// Print results of precomputation
		numTarget = target.cardinality();
		numInf = inf.cardinality();
		mainLog.println("target=" + numTarget + ", inf=" + numInf + ", rest=" + (n - (numTarget + numInf)));

		// If required, generate strategy for "inf" states.
		if (genStrat || exportAdv || mdpSolnMethod == MDPSolnMethod.POLICY_ITERATION) {
			if (min) {
				// If min reward is infinite, all choices give infinity
				// So the choice can be arbitrary, denoted by -2; 
				for (int i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
					strat[i] = -2;
				}
			} else {
				// If max reward is infinite, there is at least one choice giving infinity.
				// So we pick, for all "inf" states, the first choice for which some transitions stays in "inf".
				for (int i = inf.nextSetBit(0); i >= 0; i = inf.nextSetBit(i + 1)) {
					int numChoices = mdp.getNumChoices(i);
					for (int k = 0; k < numChoices; k++) {
						if (mdp.someSuccessorsInSet(i, k, inf)) {
							strat[i] = k;
							continue;
						}
					}
				}
			}
		}

		ZeroRewardECQuotient quotient = null;
		boolean doZeroMECCheckForMin = true;
		if (min & doZeroMECCheckForMin) {
			StopWatch zeroMECTimer = new StopWatch(mainLog);
			zeroMECTimer.start("checking for zero-reward ECs");
			mainLog.println("For Rmin, checking for zero-reward ECs...");
			BitSet unknown = (BitSet) inf.clone();
			unknown.flip(0, mdp.getNumStates());
			unknown.andNot(target);
			quotient = ZeroRewardECQuotient.getQuotient(this, mdp, unknown, mdpRewards);

			if (quotient == null) {
				zeroMECTimer.stop("no zero-reward ECs found, proceeding normally");
			} else {
				zeroMECTimer.stop("built quotient MDP with " + quotient.getNumberOfZeroRewardMECs() + " zero-reward MECs");
				if (strat != null) {
					throw new PrismException("Constructing a strategy for Rmin in the presence of zero-reward ECs is currently not supported");
				}
			}
		}

		if (quotient != null) {
			BitSet newInfStates = (BitSet)inf.clone();
			newInfStates.or(quotient.getNonRepresentativeStates());
			int quotientModelStates = quotient.getModel().getNumStates() - newInfStates.cardinality();
			mainLog.println("Computing Rmin in zero-reward EC quotient model (" + quotientModelStates + " relevant states)...");
			res = computeReachRewardsNumeric(quotient.getModel(), quotient.getRewards(), mdpSolnMethod, target, newInfStates, min, init, known, strat);
			quotient.mapResults(res.soln);
		} else {
			res = computeReachRewardsNumeric(mdp, mdpRewards, mdpSolnMethod, target, inf, min, init, known, strat);
		}

		// Store strategy
		if (genStrat) {
			res.strat = new MDStrategyArray(mdp, strat);
		}
		// Export adversary
		if (exportAdv) {
			// Prune strategy, if needed
			if (getRestrictStratToReach()) {
				restrictStrategyToReachableStates(mdp, strat);
			}
			// Export
			PrismLog out = new PrismFileLog(exportAdvFilename);
			new DTMCFromMDPMemorylessAdversary(mdp, strat).exportToPrismExplicitTra(out);
			out.close();
		}

		// Finished expected reachability
		timer = System.currentTimeMillis() - timer;
		mainLog.println("Expected reachability took " + timer / 1000.0 + " seconds.");

		// Update time taken
		res.timeTaken = timer / 1000.0;
		res.timePre = timerProb1 / 1000.0;

		return res;
	}

	protected ModelCheckerResult computeReachRewardsNumeric(MDP mdp, MDPRewards mdpRewards, MDPSolnMethod method, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[]) throws PrismException
	{
		ModelCheckerResult res = null;

		IterationMethod iterationMethod = null;
		switch (method) {
		case VALUE_ITERATION:
			iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
			break;
		case GAUSS_SEIDEL:
			iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
			break;
		case POLICY_ITERATION:
			if (doIntervalIteration) {
				throw new PrismNotSupportedException("Interval iteration currently not supported for policy iteration");
			}
			res = computeReachRewardsPolIter(mdp, mdpRewards, target, inf, min, strat);
			break;
		default:
			throw new PrismException("Unknown MDP solution method " + method.fullName());
		}

		if (res == null) { // not yet computed, use iterationMethod
			if (!doIntervalIteration) {
				res = doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, getDoTopologicalValueIteration(), strat);
			} else {
				res = doIntervalIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, getDoTopologicalValueIteration(), strat);
			}
		}

		return res;
	}

	/**
	 * Compute expected reachability rewards using value iteration.
	 * Optionally, store optimal (memoryless) strategy info. 
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param inf States for which reward is infinite
	 * @param min Min or max rewards (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult computeReachRewardsValIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[], BitSet known, int strat[])
			throws PrismException
	{
		IterationMethodPower iterationMethod = new IterationMethodPower(termCrit == TermCrit.ABSOLUTE, termCritParam);
		return doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, false, strat);
	}

	/**
	 * Compute expected reachability rewards using value iteration.
	 * Optionally, store optimal (memoryless) strategy info.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param inf States for which reward is infinite
	 * @param min Min or max rewards (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten)
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param topological Do topological value iteration?
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult doValueIterationReachRewards(MDP mdp, MDPRewards mdpRewards, IterationMethod iterationMethod, BitSet target, BitSet inf, boolean min, double init[], BitSet known, boolean topological, int strat[])
			throws PrismException
	{
		BitSet unknown;
		int i, n;
		long timer;

		// Start value iteration
		timer = System.currentTimeMillis();
		String description = (min ? "min" : "max") + (topological ? ", topological" : "" ) + ", with " + iterationMethod.getDescriptionShort();
		mainLog.println("Starting value iteration (" + description + ")...");

		ExportIterations iterationsExport = null;
		if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
			iterationsExport = new ExportIterations("Explicit MDP ReachRewards value iteration (" + description +")");
			mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
		}

		// Store num states
		n = mdp.getNumStates();

		// Initialise solution vectors. Use (where available) the following in order of preference:
		// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) passed in initial value; (4) 0.0
		if (init != null) {
			if (known != null) {
				for (i = 0; i < n; i++)
					init[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
			} else {
				for (i = 0; i < n; i++)
					init[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : init[i];
			}
		} else {
			init = new double[n];
			for (i = 0; i < n; i++)
				init[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;
		}

		// Determine set of states actually need to compute values for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(target);
		unknown.andNot(inf);
		if (known != null)
			unknown.andNot(known);

		if (iterationsExport != null)
			iterationsExport.exportVector(init, 0);

		IterationMethod.IterationValIter forMvMultRewMinMax = iterationMethod.forMvMultRewMinMax(mdp, mdpRewards, min, strat);
		forMvMultRewMinMax.init(init);

		IntSet unknownStates = IntSet.asIntSet(unknown);

		if (topological) {
			// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
			// states in unknown
			SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);

			IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
				soln[s] = mdp.mvMultRewJacMinMaxSingle(s, soln, mdpRewards, min, strat);
			};

			// run the actual value iteration
			return iterationMethod.doTopologicalValueIteration(this, description, sccs, forMvMultRewMinMax, singletonSCCSolver, timer, iterationsExport);
		} else {
			// run the actual value iteration
			return iterationMethod.doValueIteration(this, description, forMvMultRewMinMax, unknownStates, timer, iterationsExport);
		}
	}

	/**
	 * Compute expected reachability rewards using Gauss-Seidel (including Jacobi-style updates).
	 * Optionally, store optimal (memoryless) strategy info. 
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param inf States for which reward is infinite
	 * @param min Min or max rewards (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten) 
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult computeReachRewardsGaussSeidel(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, double init[],
			BitSet known, int strat[]) throws PrismException
	{
		IterationMethodGS iterationMethod = new IterationMethodGS(termCrit == TermCrit.ABSOLUTE, termCritParam, false);
		return doValueIterationReachRewards(mdp, mdpRewards, iterationMethod, target, inf, min, init, known, false, strat);
	}

	/**
	 * Compute expected reachability rewards using interval iteration
	 * Optionally, store optimal (memoryless) strategy info.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param inf States for which reward is infinite
	 * @param min Min or max rewards (true=min, false=max)
	 * @param init Optionally, an initial solution vector (will be overwritten)
	 * @param known Optionally, a set of states for which the exact answer is known
	 * @param topological do topological interval iteration
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 * Note: if 'known' is specified (i.e. is non-null, 'init' must also be given and is used for the exact values.
	 */
	protected ModelCheckerResult doIntervalIterationReachRewards(MDP mdp, MDPRewards mdpRewards, IterationMethod iterationMethod, BitSet target, BitSet inf, boolean min, double init[], BitSet known, boolean topological, int strat[])
			throws PrismException
	{
		BitSet unknown;
		int i, n;
		double initBelow[], initAbove[];
		long timer;

		// Store num states
		n = mdp.getNumStates();

		// Determine set of states actually need to compute values for
		unknown = new BitSet();
		unknown.set(0, n);
		unknown.andNot(target);
		unknown.andNot(inf);
		if (known != null)
			unknown.andNot(known);

		OptionsIntervalIteration iiOptions = OptionsIntervalIteration.from(this);

		double upperBound;
		if (iiOptions.hasManualUpperBound()) {
			upperBound = iiOptions.getManualUpperBound();
			getLog().printWarning("Upper bound for interval iteration manually set to " + upperBound);
		} else {
			if (min) {
				upperBound = computeReachRewardsMinUpperBound(mdp, mdpRewards, target, unknown, inf);
			} else {
				upperBound = computeReachRewardsMaxUpperBound(mdp, mdpRewards, target, unknown, inf);
			}
		}

		double lowerBound;
		if (iiOptions.hasManualLowerBound()) {
			lowerBound = iiOptions.getManualLowerBound();
			getLog().printWarning("Lower bound for interval iteration manually set to " + lowerBound);
		} else {
			lowerBound = 0.0;
		}

		if (min) {
			if (!isContracting(mdp, unknown, target)) {
				throw new PrismNotSupportedException("Interval iteration for Rmin and non-contracting MDP currently not supported");
			} else {
				mainLog.println("Relevant sub-MDP is contracting, proceed...");
			}
		}

		// Start value iteration
		timer = System.currentTimeMillis();
		String description = (min ? "min" : "max") + (topological ? ", topological" : "") + ", with " + iterationMethod.getDescriptionShort();
		mainLog.println("Starting interval iteration (" + description + ")...");

		ExportIterations iterationsExport = null;
		if (settings.getBoolean(PrismSettings.PRISM_EXPORT_ITERATIONS)) {
			iterationsExport = new ExportIterations("Explicit MDP ReachRewards interval iteration (" + description + ")");
			mainLog.println("Exporting iterations to " + iterationsExport.getFileName());
		}

		// Create initial solution vector(s)
		initBelow = (init == null) ? new double[n] : init;
		initAbove = new double[n];

		// Initialise solution vector from below. Use (where available) the following in order of preference:
		// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) lowerBound
		if (init != null && known != null) {
			for (i = 0; i < n; i++)
				initBelow[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : lowerBound;
		} else {
			for (i = 0; i < n; i++)
				initBelow[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : lowerBound;
		}

		// Initialise solution vector from above. Use (where available) the following in order of preference:
		// (1) exact answer, if already known; (2) 0.0/infinity if in target/inf; (3) upperBound
		if (init != null && known != null) {
			for (i = 0; i < n; i++)
				initAbove[i] = known.get(i) ? init[i] : target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : upperBound;
		} else {
			for (i = 0; i < n; i++)
				initAbove[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : upperBound;
		}

		if (iterationsExport != null) {
			iterationsExport.exportVector(initBelow, 0);
			iterationsExport.exportVector(initAbove, 1);
		}

		final boolean enforceMonotonicFromBelow = iiOptions.isEnforceMonotonicityFromBelow();
		final boolean enforceMonotonicFromAbove = iiOptions.isEnforceMonotonicityFromAbove();
		final boolean checkMonotonic = iiOptions.isCheckMonotonicity();

		if (!enforceMonotonicFromAbove) {
			getLog().println("Note: Interval iteration is configured to not enforce monotonicity from above.");
		}
		if (!enforceMonotonicFromBelow) {
			getLog().println("Note: Interval iteration is configured to not enforce monotonicity from below.");
		}

		IterationMethod.IterationIntervalIter below = iterationMethod.forMvMultRewMinMaxInterval(mdp, mdpRewards, min, strat, true, enforceMonotonicFromBelow, checkMonotonic);
		IterationMethod.IterationIntervalIter above = iterationMethod.forMvMultRewMinMaxInterval(mdp, mdpRewards, min, strat, false, enforceMonotonicFromAbove, checkMonotonic);
		below.init(initBelow);
		above.init(initAbove);

		IntSet unknownStates = IntSet.asIntSet(unknown);

		ModelCheckerResult rv;
		if (topological) {
			// Compute SCCInfo, including trivial SCCs in the subgraph obtained when only considering
			// states in unknown
			SCCInfo sccs = SCCComputer.computeTopologicalOrdering(this, mdp, true, unknown::get);

			IterationMethod.SingletonSCCSolver singletonSCCSolver = (int s, double[] soln) -> {
				soln[s] = mdp.mvMultRewJacMinMaxSingle(s, soln, mdpRewards, min, strat);
			};

			// run the actual value iteration
			rv = iterationMethod.doTopologicalIntervalIteration(this, description, sccs, below, above, singletonSCCSolver, timer, iterationsExport);
		} else {
			// run the actual value iteration
			rv = iterationMethod.doIntervalIteration(this, description, below, above, unknownStates, timer, iterationsExport);
		}

		double max_v = PrismUtils.findMaxFinite(rv.soln, unknownStates.iterator());
		if (max_v != Double.NEGATIVE_INFINITY) {
			mainLog.println("Maximum finite value in solution vector at end of interval iteration: " + max_v);
		}

		return rv;
	}

	/**
	 * Compute expected reachability rewards using policy iteration.
	 * The array {@code strat} is used both to pass in the initial strategy for policy iteration,
	 * and as storage for the resulting optimal strategy (if needed).
	 * Passing in an initial strategy is required when some states have infinite reward,
	 * to avoid the possibility of policy iteration getting stuck on an infinite-value strategy.
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param target Target states
	 * @param inf States for which reward is infinite
	 * @param min Min or max rewards (true=min, false=max)
	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
	 */
	protected ModelCheckerResult computeReachRewardsPolIter(MDP mdp, MDPRewards mdpRewards, BitSet target, BitSet inf, boolean min, int strat[])
			throws PrismException
	{
		ModelCheckerResult res;
		int i, n, iters, totalIters;
		double soln[], soln2[];
		boolean done;
		long timer;
		DTMCModelChecker mcDTMC;
		DTMC dtmc;
		MCRewards mcRewards;

		// Re-use solution to solve each new policy (strategy)?
		boolean reUseSoln = true;

		// Start policy iteration
		timer = System.currentTimeMillis();
		mainLog.println("Starting policy iteration (" + (min ? "min" : "max") + ")...");

		// Create a DTMC model checker (for solving policies)
		mcDTMC = new DTMCModelChecker(this);
		mcDTMC.inheritSettings(this);
		mcDTMC.setLog(new PrismDevNullLog());

		// Store num states
		n = mdp.getNumStates();

		// Create solution vector(s)
		soln = new double[n];
		soln2 = new double[n];

		// Initialise solution vectors.
		for (i = 0; i < n; i++)
			soln[i] = soln2[i] = target.get(i) ? 0.0 : inf.get(i) ? Double.POSITIVE_INFINITY : 0.0;

		// If not passed in, create new storage for strategy and initialise
		// Initial strategy just picks first choice (0) everywhere
		if (strat == null) {
			strat = new int[n];
			for (i = 0; i < n; i++)
				strat[i] = 0;
		}
			
		// Start iterations
		iters = totalIters = 0;
		done = false;
		while (!done && iters < maxIters) {
			iters++;
			// Solve induced DTMC for strategy
			dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
			mcRewards = new MCRewardsFromMDPRewards(mdpRewards, strat);
			res = mcDTMC.computeReachRewardsValIter(dtmc, mcRewards, target, inf, reUseSoln ? soln : null, null);
			soln = res.soln;
			totalIters += res.numIters;
			// Check if optimal, improve non-optimal choices
			mdp.mvMultRewMinMax(soln, mdpRewards, min, soln2, null, false, null);
			done = true;
			for (i = 0; i < n; i++) {
				// Don't look at target/inf states - we may not have strategy info for them,
				// so they might appear non-optimal
				if (target.get(i) || inf.get(i))
					continue;
				if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
					done = false;
					List<Integer> opt = mdp.mvMultRewMinMaxSingleChoices(i, soln, mdpRewards, min, soln2[i]);
					// Only update strategy if strictly better
					if (!opt.contains(strat[i]))
						strat[i] = opt.get(0);
				}
			}
		}

		// Finished policy iteration
		timer = System.currentTimeMillis() - timer;
		mainLog.print("Policy iteration");
		mainLog.println(" took " + iters + " cycles (" + totalIters + " iterations in total) and " + timer / 1000.0 + " seconds.");

		// Return results
		res = new ModelCheckerResult();
		res.soln = soln;
		res.numIters = totalIters;
		res.timeTaken = timer / 1000.0;
		return res;
	}

	/**
	 * Construct strategy information for min/max expected reachability.
	 * (More precisely, list of indices of choices resulting in min/max.)
	 * (Note: indices are guaranteed to be sorted in ascending order.)
	 * @param mdp The MDP
	 * @param mdpRewards The rewards
	 * @param state The state to generate strategy info for
	 * @param target The set of target states to reach
	 * @param min Min or max rewards (true=min, false=max)
	 * @param lastSoln Vector of values from which to recompute in one iteration 
	 */
	public List<Integer> expReachStrategy(MDP mdp, MDPRewards mdpRewards, int state, BitSet target, boolean min, double lastSoln[]) throws PrismException
	{
		double val = mdp.mvMultRewMinMaxSingle(state, lastSoln, mdpRewards, min, null);
		return mdp.mvMultRewMinMaxSingleChoices(state, lastSoln, mdpRewards, min, val);
	}

	/**
	 * Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
	 * to the states of the MDP that are reachable under that strategy.  
	 * @param mdp The MDP
	 * @param strat The strategy
	 */
	public void restrictStrategyToReachableStates(MDP mdp, int strat[])
	{
		BitSet restrict = new BitSet();
		BitSet explore = new BitSet();
		// Get initial states
		for (int is : mdp.getInitialStates()) {
			restrict.set(is);
			explore.set(is);
		}
		// Compute reachable states (store in 'restrict') 
		boolean foundMore = true;
		while (foundMore) {
			foundMore = false;
			for (int s = explore.nextSetBit(0); s >= 0; s = explore.nextSetBit(s + 1)) {
				explore.set(s, false);
				if (strat[s] >= 0) {
					Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, strat[s]);
					while (iter.hasNext()) {
						Map.Entry<Integer, Double> e = iter.next();
						int dest = e.getKey();
						if (!restrict.get(dest)) {
							foundMore = true;
							restrict.set(dest);
							explore.set(dest);
						}
					}
				}
			}
		}
		// Set strategy choice for non-reachable state to -1
		int n = mdp.getNumStates();
		for (int s = restrict.nextClearBit(0); s < n; s = restrict.nextClearBit(s + 1)) {
			strat[s] = -3;
		}
	}

	/**
	 * Compute the end component quotient (for use with PMax),
	 * each maximal end component is collapsed to a single state,
	 * likewise the yes and no regions, respectively.
	 */
	private MDPEquiv maxQuotient(MDP mdp, BitSet yes, BitSet no) throws PrismException
	{
		BitSet maybe = new BitSet();
		maybe.set(0, mdp.getNumStates());
		maybe.andNot(yes);
		maybe.andNot(no);

		ECComputer ec = ECComputer.createECComputer(this, mdp);

		ec.computeMECStates(maybe);
		List<BitSet> mecs = ec.getMECStates();
		mecs.add(yes);
		mecs.add(no);

		EquivalenceRelationInteger eq = new EquivalenceRelationInteger(mecs);
		BasicModelTransformation<MDP, MDPEquiv> quotientTransform = MDPEquiv.transformDroppingLoops(mdp, eq);
		MDPEquiv quotient = quotientTransform.getTransformedModel();

		//mdp.exportToDotFile("original.dot");
		//quotient.exportToDotFile("maxQuotient.dot");

		int realStates = quotient.getNumStates() - quotient.getNonRepresentativeStates().cardinality();
		mainLog.println("Max-Quotient MDP: " + realStates + " equivalence classes / non-trap states.");

		return quotient;
	}

	/**
	 * Simple test program.
	 */
	public static void main(String args[])
	{
		MDPModelChecker mc;
		MDPSimple mdp;
		ModelCheckerResult res;
		BitSet init, target;
		Map<String, BitSet> labels;
		boolean min = true;
		try {
			mc = new MDPModelChecker(null);
			mdp = new MDPSimple();
			mdp.buildFromPrismExplicit(args[0]);
			mdp.addInitialState(0);
			//System.out.println(mdp);
			labels = StateModelChecker.loadLabelsFile(args[1]);
			//System.out.println(labels);
			init = labels.get("init");
			target = labels.get(args[2]);
			if (target == null)
				throw new PrismException("Unknown label \"" + args[2] + "\"");
			for (int i = 3; i < args.length; i++) {
				if (args[i].equals("-min"))
					min = true;
				else if (args[i].equals("-max"))
					min = false;
				else if (args[i].equals("-nopre"))
					mc.setPrecomp(false);
			}
			res = mc.computeReachProbs(mdp, target, min);
			System.out.println(res.soln[init.nextSetBit(0)]);
		} catch (PrismException e) {
			System.out.println(e);
		}
	}

}