Enable simulation of POMDPs via SimulatorEngine.

5 years ago · d78b627bf4
8 changed files with 100 additions and 20 deletions
--- a/prism/src/cex/CexPathAsBDDs.java
+++ b/prism/src/cex/CexPathAsBDDs.java
@ -85,6 +85,12 @@ public class CexPathAsBDDs implements PathFullInfo
 		return model.convertBddToState(states.get(step));
 	}
 	
+	@Override
+	public State getObservation(int step)
+	{
+		return null;
+	}
+	
 	@Override
 	public double getStateReward(int step, int rsi)
 	{
--- a/prism/src/cex/CexPathStates.java
+++ b/prism/src/cex/CexPathStates.java
@ -79,6 +79,12 @@ public class CexPathStates implements PathFullInfo
 		return states.get(step);
 	}
 	
+	@Override
+	public State getObservation(int step)
+	{
+		return null;
+	}
+	
 	@Override
 	public double getStateReward(int step, int rsi)
 	{
--- a/prism/src/simulator/Path.java
+++ b/prism/src/simulator/Path.java
@ -37,22 +37,22 @@ public abstract class Path
 	// MUTATORS
 	
 	/**
-	 * Initialise the path with an initial state and rewards.
-	 * Note: State object and array will be copied, not stored directly.
+	 * Initialise the path with an initial state, observation and rewards.
+	 * Note: State objects and array will be copied, not stored directly.
 	 */
-	public abstract void initialise(State initialState, double[] initialStateRewards);
+	public abstract void initialise(State initialState, State initialObs, double[] initialStateRewards);

 	/**
 	 * Add a step to the path.
 	 * Note: State object and arrays will be copied, not stored directly.
 	 */
-	public abstract void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen);
+	public abstract void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen);

 	/**
 	 * Add a timed step to the path.
 	 * Note: State object and arrays will be copied, not stored directly.
 	 */
-	public abstract void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen);
+	public abstract void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen);

 	// ACCESSORS

@ -76,6 +76,11 @@ public abstract class Path
 	 */
 	public abstract State getCurrentState();

+	/**
+	 * Get the observation for the current state, i.e. for the current final state of the path.
+	 */
+	public abstract State getCurrentObservation();
+
 	/**
 	 * Get the action taken in the previous step.
 	 */
--- a/prism/src/simulator/PathFull.java
+++ b/prism/src/simulator/PathFull.java
@ -89,7 +89,7 @@ public class PathFull extends Path implements PathFullInfo
 	// MUTATORS (for Path)

 	@Override
-	public void initialise(State initialState, double[] initialStateRewards)
+	public void initialise(State initialState, State initialObs, double[] initialStateRewards)
 	{
 		clear();
 		// Add new step item to the path
@ -97,6 +97,7 @@ public class PathFull extends Path implements PathFullInfo
 		steps.add(step);
 		// Add (copies of) initial state and state rewards to new step
 		step.state = new State(initialState);
+		step.obs = initialObs == null ? null : new State(initialObs);
 		step.stateRewards = initialStateRewards.clone();
 		// Set cumulative time/reward (up until entering this state)
 		step.timeCumul = 0.0;
@ -108,15 +109,15 @@ public class PathFull extends Path implements PathFullInfo
 	}

 	@Override
-	public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards,
+	public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards,
 			ModelGenerator modelGen)
 	{
-		addStep(1.0, choice, action, actionString, probability, transitionRewards, newState, newStateRewards, modelGen);
+		addStep(1.0, choice, action, actionString, probability, transitionRewards, newState, newObs, newStateRewards, modelGen);
 	}

 	@Override
 	public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState,
-			double[] newStateRewards, ModelGenerator modelGen)
+			State newObs, double[] newStateRewards, ModelGenerator modelGen)
 	{
 		Step stepOld, stepNew;
 		// Add info to last existing step
@ -130,8 +131,9 @@ public class PathFull extends Path implements PathFullInfo
 		// Add new step item to the path
 		stepNew = new Step();
 		steps.add(stepNew);
-		// Add (copies of) new state and state rewards to new step
+		// Add (copies of) new state, observation and state rewards to new step
 		stepNew.state = new State(newState);
+		stepNew.obs = newObs == null ? null : new State(newObs);
 		stepNew.stateRewards = newStateRewards.clone();
 		// Set cumulative time/rewards (up until entering this state)
 		stepNew.timeCumul = stepOld.timeCumul + time;
@ -241,6 +243,12 @@ public class PathFull extends Path implements PathFullInfo
 		return steps.get(steps.size() - 1).state;
 	}

+	@Override
+	public State getCurrentObservation()
+	{
+		return steps.get(steps.size() - 1).obs;
+	}
+
 	@Override
 	public Object getPreviousAction()
 	{
@ -339,6 +347,12 @@ public class PathFull extends Path implements PathFullInfo
 		return steps.get(step).state;
 	}

+	@Override
+	public State getObservation(int step)
+	{
+		return steps.get(step).obs;
+	}
+	
 	@Override
 	public double getStateReward(int step, int rsi)
 	{
@ -545,6 +559,7 @@ public class PathFull extends Path implements PathFullInfo
 		{
 			// Set (unknown) defaults and initialise arrays
 			state = null;
+			obs = null;
 			stateRewards = new double[numRewardStructs];
 			timeCumul = 0.0;
 			rewardsCumul = new double[numRewardStructs];
@ -558,6 +573,8 @@ public class PathFull extends Path implements PathFullInfo

 		// Current state (before transition)
 		public State state;
+		// Observation for current state
+		public State obs;
 		// State rewards for current state
 		public double stateRewards[];
 		// Cumulative time spent up until entering this state
--- a/prism/src/simulator/PathFullInfo.java
+++ b/prism/src/simulator/PathFullInfo.java
@ -44,6 +44,12 @@ public interface PathFullInfo
 	 */
 	public abstract State getState(int step);

+	/**
+	 * Get the observation at a given step of the path.
+	 * @param step Step index (0 = initial state/step of path)
+	 */
+	public abstract State getObservation(int step);
+
 	/**
 	 * Get a state reward for the state at a given step of the path.
 	 * If no reward info is stored ({@link #hasRewardInfo()} is false), returns 0.0. 
--- a/prism/src/simulator/PathFullPrefix.java
+++ b/prism/src/simulator/PathFullPrefix.java
@ -52,19 +52,19 @@ public class PathFullPrefix extends Path
 	// MUTATORS (for Path)

 	@Override
-	public void initialise(State initialState, double[] initialStateRewards)
+	public void initialise(State initialState, State initialObs, double[] initialStateRewards)
 	{
 		// Do nothing (we are not allowed to modify the underlying PathFull)
 	}

 	@Override
-	public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards, ModelGenerator modelGen)
+	public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen)
 	{
 		// Do nothing (we are not allowed to modify the underlying PathFull)
 	}

 	@Override
-	public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards, ModelGenerator modelGen)
+	public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen)
 	{
 		// Do nothing (we are not allowed to modify the underlying PathFull)
 	}
@ -102,6 +102,12 @@ public class PathFullPrefix extends Path
 		return pathFull.getState(prefixLength);
 	}

+	@Override
+	public State getCurrentObservation()
+	{
+		return pathFull.getObservation(prefixLength);
+	}
+
 	@Override
 	public Object getPreviousAction()
 	{
--- a/prism/src/simulator/PathOnTheFly.java
+++ b/prism/src/simulator/PathOnTheFly.java
@ -48,6 +48,7 @@ public class PathOnTheFly extends Path
 	protected long size;
 	protected State previousState;
 	protected State currentState;
+	protected State currentObs;
 	protected Object previousAction;
 	protected String previousActionString;
 	protected double previousProbability;
@ -73,6 +74,10 @@ public class PathOnTheFly extends Path
 		// Create State objects for current/previous state
 		previousState = new State(modelInfo.getNumVars());
 		currentState = new State(modelInfo.getNumVars());
+		currentObs = null;
+		if (modelInfo.getModelType().partiallyObservable()) {
+			currentObs = new State(modelInfo.getNumObservables());
+		}
 		// Create arrays to store totals
 		totalRewards = new double[numRewardStructs];
 		previousStateRewards = new double[numRewardStructs];
@ -106,10 +111,13 @@ public class PathOnTheFly extends Path
 	// MUTATORS (for Path)

 	@Override
-	public void initialise(State initialState, double[] initialStateRewards)
+	public void initialise(State initialState, State initialObs, double[] initialStateRewards)
 	{
 		clear();
 		currentState.copy(initialState);
+		if (initialObs != null) {
+			currentObs.copy(initialObs);
+		}
 		for (int i = 0; i < numRewardStructs; i++) {
 			currentStateRewards[i] = initialStateRewards[i];
 		}
@ -118,17 +126,20 @@ public class PathOnTheFly extends Path
 	}

 	@Override
-	public void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen)
+	public void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen)
 	{
-		addStep(1.0, choice, action, actionString, probability, transRewards, newState, newStateRewards, modelGen);
+		addStep(1.0, choice, action, actionString, probability, transRewards, newState, newObs, newStateRewards, modelGen);
 	}

 	@Override
-	public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen)
+	public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen)
 	{
 		size++;
 		previousState.copy(currentState);
 		currentState.copy(newState);
+		if (newObs != null) {
+			currentObs.copy(newObs);
+		}
 		previousAction = action;
 		previousActionString = actionString;
 		previousProbability = probability;
@ -174,6 +185,12 @@ public class PathOnTheFly extends Path
 		return currentState;
 	}

+	@Override
+	public State getCurrentObservation()
+	{
+		return currentObs;
+	}
+
 	@Override
 	public Object getPreviousAction()
 	{
--- a/prism/src/simulator/SimulatorEngine.java
+++ b/prism/src/simulator/SimulatorEngine.java
@ -303,10 +303,12 @@ public class SimulatorEngine extends PrismComponent
 				throw new PrismNotSupportedException("Random choice of multiple initial states not yet supported");
 			}
 		}
+		// Get initial observation
+		State currentObs = modelGen.getObservation(currentState);
 		// Get initial state reward
 		calculateStateRewards(currentState, tmpStateRewards);
 		// Initialise stored path
-		path.initialise(currentState, tmpStateRewards);
+		path.initialise(currentState, currentObs, tmpStateRewards);
 		// Explore initial state in model generator
 		computeTransitionsForState(currentState);
 		// Reset and then update samplers for any loaded properties
@ -373,6 +375,7 @@ public class SimulatorEngine extends PrismComponent
 			executeTransition(ref.i, ref.offset, -1);
 			break;
 		case MDP:
+		case POMDP:
 			// Pick a random choice
 			i = rng.randomUnifInt(modelGen.getNumChoices());
 			// Pick a random transition from this choice
@ -858,10 +861,12 @@ public class SimulatorEngine extends PrismComponent
 		calculateTransitionRewards(path.getCurrentState(), action, tmpTransitionRewards);
 		// Compute next state
 		currentState.copy(modelGen.computeTransitionTarget(i, offset));
+		// Compute observation for new state
+		State currentObs = modelGen.getObservation(currentState);
 		// Compute state rewards for new state
 		calculateStateRewards(currentState, tmpStateRewards);
 		// Update path
-		path.addStep(index, action, actionString, p, tmpTransitionRewards, currentState, tmpStateRewards, modelGen);
+		path.addStep(index, action, actionString, p, tmpTransitionRewards, currentState, currentObs, tmpStateRewards, modelGen);
 		// Explore new state in model generator
 		computeTransitionsForState(currentState);
 		// Update samplers for any loaded properties
@ -896,10 +901,12 @@ public class SimulatorEngine extends PrismComponent
 		calculateTransitionRewards(path.getCurrentState(), action, tmpTransitionRewards);
 		// Compute next state
 		currentState.copy(modelGen.computeTransitionTarget(i, offset));
+		// Compute observation for new state
+		State currentObs = modelGen.getObservation(currentState);
 		// Compute state rewards for new state
 		calculateStateRewards(currentState, tmpStateRewards);
 		// Update path
-		path.addStep(time, index, action, actionString, p, tmpTransitionRewards, currentState, tmpStateRewards, modelGen);
+		path.addStep(time, index, action, actionString, p, tmpTransitionRewards, currentState, currentObs, tmpStateRewards, modelGen);
 		// Explore new state in model generator
 		computeTransitionsForState(currentState);
 		// Update samplers for any loaded properties
@ -1278,6 +1285,16 @@ public class SimulatorEngine extends PrismComponent
 		return ((PathFull) path).getState(step);
 	}

+	/**
+	 * Get the observation at a given step of the path.
+	 * (Not applicable for on-the-fly paths)
+	 * @param step Step index (0 = initial state/step of path)
+	 */
+	public State getObservationOfPathStep(int step)
+	{
+		return ((PathFull) path).getObservation(step);
+	}
+
 	/**
 	 * Get a state reward for the state at a given step of the path.
 	 * (Not applicable for on-the-fly paths)