POMDP computation performed for any (single) state, not just the initial one.

5 years ago · e594f2309b
3 changed files with 40 additions and 27 deletions
--- a/prism-tests/functionality/verify/pomdps/mdp_simple.prism.props
+++ b/prism-tests/functionality/verify/pomdps/mdp_simple.prism.props
@ -4,11 +4,17 @@ Pmax=? [ F t=1 ];
 // RESULT: 0.1
 Pmin=? [ F t=1 ];

+// RESULT: 0.3
+filter(state, Pmin=? [ F t=1 ], s=2&t=0);
+
 // RESULT: 0.3
 Pmax=? [ s<=2 U t=1 ];

 // RESULT: 3.0
 Rmax=? [ F t>0 ];

+// RESULT: 2.0
+filter(state, Rmax=? [ F t>0 ], s=2&t=0);
+
 // RESULT: 1.0
 Rmin=? [ F t>0 ];
--- a/prism/src/explicit/POMDPModelChecker.java
+++ b/prism/src/explicit/POMDPModelChecker.java
@ -111,15 +111,18 @@ public class POMDPModelChecker extends ProbModelChecker
 	 * @param target Target states
 	 * @param min Min or max probabilities (true=min, false=max)
 	 */
-	public ModelCheckerResult computeReachProbs(POMDP pomdp, BitSet remain, BitSet target, boolean min) throws PrismException
+	public ModelCheckerResult computeReachProbs(POMDP pomdp, BitSet remain, BitSet target, boolean min, BitSet statesOfInterest) throws PrismException
 	{
 		ModelCheckerResult res = null;
 		long timer;
 		String stratFilename = null;

-		// Check for multiple initial states 
-		if (pomdp.getNumInitialStates() > 1) {
-			throw new PrismNotSupportedException("POMDP model checking does not yet support multiple initial states");
+		// Check we are only computing for a single state (and use initial state if unspecified)
+		if (statesOfInterest == null) {
+			statesOfInterest = new BitSet();
+			statesOfInterest.set(pomdp.getFirstInitialState());
+		} else if (statesOfInterest.cardinality() > 1) {
+			throw new PrismNotSupportedException("POMDPs can only be solved from a single start state");
 		}
 		
 		// Start probabilistic reachability
@ -132,7 +135,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		}

 		// Compute rewards
-		res = computeReachProbsFixedGrid(pomdp, remain, target, min, stratFilename);
+		res = computeReachProbsFixedGrid(pomdp, remain, target, min, statesOfInterest.nextSetBit(0), stratFilename);

 		// Finished probabilistic reachability
 		timer = System.currentTimeMillis() - timer;
@ -148,14 +151,16 @@ public class POMDPModelChecker extends ProbModelChecker
 	 * i.e. compute the min/max probability of reaching a state in {@code target},
 	 * while remaining in those in @{code remain},
 	 * using Lovejoy's fixed-resolution grid approach.
+	 * This only computes the probabiity from a single start state
 	 * Optionally, store optimal (memoryless) strategy info. 
 	 * @param pomdp The POMDP
 	 * @param remain Remain in these states (optional: null means "all")
 	 * @param target Target states
 	 * @param min Min or max rewards (true=min, false=max)
+	 * @param sInit State to compute for
 	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
 	 */
-	protected ModelCheckerResult computeReachProbsFixedGrid(POMDP pomdp, BitSet remain, BitSet target, boolean min, String stratFilename) throws PrismException
+	protected ModelCheckerResult computeReachProbsFixedGrid(POMDP pomdp, BitSet remain, BitSet target, boolean min, int sInit, String stratFilename) throws PrismException
 	{
 		// Start fixed-resolution grid approximation
 		long timer = System.currentTimeMillis();
@ -231,7 +236,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		
 		// Extract (approximate) solution value for the initial belief
 		// Also get (approximate) accuracy of result from value iteration
-		Belief initialBelief = pomdp.getInitialBelief();
+		Belief initialBelief = Belief.pointDistribution(sInit, pomdp);
 		double outerBound = values.apply(initialBelief);
 		double outerBoundMaxDiff = PrismUtils.measureSupNorm(vhash, vhash_backUp, termCrit == TermCrit.RELATIVE);
 		Accuracy outerBoundAcc = AccuracyFactory.valueIteration(termCritParam, outerBoundMaxDiff, termCrit == TermCrit.RELATIVE);
@ -240,7 +245,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		
 		// Build DTMC to get inner bound (and strategy)
 		mainLog.println("\nBuilding strategy-induced model...");
-		POMDPStrategyModel psm = buildStrategyModel(pomdp, null, targetObs, unknownObs, backup);
+		POMDPStrategyModel psm = buildStrategyModel(pomdp, sInit, null, targetObs, unknownObs, backup);
 		MDP mdp = psm.mdp;
 		mainLog.print("Strategy-induced model: " + mdp.infoString());
 		// Export?
@ -289,9 +294,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		Accuracy resultAcc = resultValAndAcc.second;
 		mainLog.println("Result bounds: [" + resultAcc.getResultLowerBound(resultVal) + "," + resultAcc.getResultUpperBound(resultVal) + "]");
 		double soln[] = new double[pomdp.getNumStates()];
-		for (int initialState : pomdp.getInitialStates()) {
-			soln[initialState] = resultVal;
-		}
+		soln[sInit] = resultVal;

 		// Return results
 		ModelCheckerResult res = new ModelCheckerResult();
@ -310,15 +313,18 @@ public class POMDPModelChecker extends ProbModelChecker
 	 * @param target Target states
 	 * @param min Min or max rewards (true=min, false=max)
 	 */
-	public ModelCheckerResult computeReachRewards(POMDP pomdp, MDPRewards mdpRewards, BitSet target, boolean min) throws PrismException
+	public ModelCheckerResult computeReachRewards(POMDP pomdp, MDPRewards mdpRewards, BitSet target, boolean min, BitSet statesOfInterest) throws PrismException
 	{
 		ModelCheckerResult res = null;
 		long timer;
 		String stratFilename = null;
 		
-		// Check for multiple initial states 
-		if (pomdp.getNumInitialStates() > 1) {
-			throw new PrismNotSupportedException("POMDP model checking does not yet support multiple initial states");
+		// Check we are only computing for a single state (and use initial state if unspecified)
+		if (statesOfInterest == null) {
+			statesOfInterest = new BitSet();
+			statesOfInterest.set(pomdp.getFirstInitialState());
+		} else if (statesOfInterest.cardinality() > 1) {
+			throw new PrismNotSupportedException("POMDPs can only be solved from a single start state");
 		}
 		
 		// Start expected reachability
@ -331,7 +337,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		}

 		// Compute rewards
-		res = computeReachRewardsFixedGrid(pomdp, mdpRewards, target, min, stratFilename);
+		res = computeReachRewardsFixedGrid(pomdp, mdpRewards, target, min, statesOfInterest.nextSetBit(0), stratFilename);

 		// Finished expected reachability
 		timer = System.currentTimeMillis() - timer;
@ -344,15 +350,17 @@ public class POMDPModelChecker extends ProbModelChecker

 	/**
 	 * Compute expected reachability rewards using Lovejoy's fixed-resolution grid approach.
+	 * This only computes the expected reward from a single start state
 	 * Optionally, store optimal (memoryless) strategy info. 
 	 * @param pomdp The POMMDP
 	 * @param mdpRewards The rewards
 	 * @param target Target states
 	 * @param inf States for which reward is infinite
 	 * @param min Min or max rewards (true=min, false=max)
+	 * @param sInit State to compute for
 	 * @param strat Storage for (memoryless) strategy choice indices (ignored if null)
 	 */
-	protected ModelCheckerResult computeReachRewardsFixedGrid(POMDP pomdp, MDPRewards mdpRewards, BitSet target, boolean min, String stratFilename) throws PrismException
+	protected ModelCheckerResult computeReachRewardsFixedGrid(POMDP pomdp, MDPRewards mdpRewards, BitSet target, boolean min, int sInit, String stratFilename) throws PrismException
 	{
 		// Start fixed-resolution grid approximation
 		long timer = System.currentTimeMillis();
@ -421,7 +429,7 @@ public class POMDPModelChecker extends ProbModelChecker

 		// Extract (approximate) solution value for the initial belief
 		// Also get (approximate) accuracy of result from value iteration
-		Belief initialBelief = pomdp.getInitialBelief();
+		Belief initialBelief = Belief.pointDistribution(sInit, pomdp);
 		double outerBound = values.apply(initialBelief);
 		double outerBoundMaxDiff = PrismUtils.measureSupNorm(vhash, vhash_backUp, termCrit == TermCrit.RELATIVE);
 		Accuracy outerBoundAcc = AccuracyFactory.valueIteration(termCritParam, outerBoundMaxDiff, termCrit == TermCrit.RELATIVE);
@ -430,7 +438,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		
 		// Build DTMC to get inner bound (and strategy)
 		mainLog.println("\nBuilding strategy-induced model...");
-		POMDPStrategyModel psm = buildStrategyModel(pomdp, mdpRewards, targetObs, unknownObs, backup);
+		POMDPStrategyModel psm = buildStrategyModel(pomdp, sInit, mdpRewards, targetObs, unknownObs, backup);
 		MDP mdp = psm.mdp;
 		MDPRewards mdpRewardsNew = psm.mdpRewards;
 		mainLog.print("Strategy-induced model: " + mdp.infoString());
@ -480,9 +488,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		Accuracy resultAcc = resultValAndAcc.second;
 		mainLog.println("Result bounds: [" + resultAcc.getResultLowerBound(resultVal) + "," + resultAcc.getResultUpperBound(resultVal) + "]");
 		double soln[] = new double[pomdp.getNumStates()];
-		for (int initialState : pomdp.getInitialStates()) {
-			soln[initialState] = resultVal;
-		}
+		soln[sInit] = resultVal;

 		// Return results
 		ModelCheckerResult res = new ModelCheckerResult();
@ -704,6 +710,7 @@ public class POMDPModelChecker extends ProbModelChecker
 	 * Build a (Markov chain) model representing the fragment of the belief MDP induced by an optimal strategy.
 	 * The model is stored as an MDP to allow easier attachment of optional actions.
 	 * @param pomdp
+	 * @param sInit
 	 * @param mdpRewards
 	 * @param vhash
 	 * @param vhash_backUp
@ -711,7 +718,7 @@ public class POMDPModelChecker extends ProbModelChecker
 	 * @param min
 	 * @param listBeliefs
 	 */
-	protected POMDPStrategyModel buildStrategyModel(POMDP pomdp, MDPRewards mdpRewards, BitSet targetObs, BitSet unknownObs, BeliefMDPBackUp backup)
+	protected POMDPStrategyModel buildStrategyModel(POMDP pomdp, int sInit, MDPRewards mdpRewards, BitSet targetObs, BitSet unknownObs, BeliefMDPBackUp backup)
 	{
 		// Initialise model/state/rewards storage
 		MDPSimple mdp = new MDPSimple();
@ -720,7 +727,7 @@ public class POMDPModelChecker extends ProbModelChecker
 		BitSet mdpTarget = new BitSet();
 		StateRewardsSimple stateRewards = new StateRewardsSimple();
 		// Add initial state
-		Belief initialBelief = pomdp.getInitialBelief();
+		Belief initialBelief = Belief.pointDistribution(sInit, pomdp);
 		exploredBeliefs.add(initialBelief);
 		toBeExploredBeliefs.offer(initialBelief);
 		mdp.addState();
@ -996,7 +1003,7 @@ public class POMDPModelChecker extends ProbModelChecker
 					mc.setPrecomp(false);
 			}
 			pomdp = new POMDPSimple(mdp);
-			res = mc.computeReachRewards(pomdp, null, target, min);
+			res = mc.computeReachRewards(pomdp, null, target, min, null);
 			System.out.println(res.soln[init.nextSetBit(0)]);
 		} catch (PrismException e) {
 			System.out.println(e);
--- a/prism/src/explicit/ProbModelChecker.java
+++ b/prism/src/explicit/ProbModelChecker.java
@ -852,7 +852,7 @@ public class ProbModelChecker extends NonProbModelChecker
 			res = ((MDPModelChecker) this).computeUntilProbs((MDP) model, remain, target, minMax.isMin());
 			break;
 		case POMDP:
-			res = ((POMDPModelChecker) this).computeReachProbs((POMDP) model, remain, target, minMax.isMin());
+			res = ((POMDPModelChecker) this).computeReachProbs((POMDP) model, remain, target, minMax.isMin(), statesOfInterest);
 			break;
 		case STPG:
 			res = ((STPGModelChecker) this).computeUntilProbs((STPG) model, remain, target, minMax.isMin1(), minMax.isMin2());
@ -1155,7 +1155,7 @@ public class ProbModelChecker extends NonProbModelChecker
 			res = ((MDPModelChecker) this).computeReachRewards((MDP) model, (MDPRewards) modelRewards, target, minMax.isMin());
 			break;
 		case POMDP:
-			res = ((POMDPModelChecker) this).computeReachRewards((POMDP) model, (MDPRewards) modelRewards, target, minMax.isMin());
+			res = ((POMDPModelChecker) this).computeReachRewards((POMDP) model, (MDPRewards) modelRewards, target, minMax.isMin(), statesOfInterest);
 			break;
 		case STPG:
 			res = ((STPGModelChecker) this).computeReachRewards((STPG) model, (STPGRewards) modelRewards, target, minMax.isMin1(), minMax.isMin2());