diff --git a/prism/src/cex/CexPathAsBDDs.java b/prism/src/cex/CexPathAsBDDs.java index c6ee76b8..803e77be 100644 --- a/prism/src/cex/CexPathAsBDDs.java +++ b/prism/src/cex/CexPathAsBDDs.java @@ -85,6 +85,12 @@ public class CexPathAsBDDs implements PathFullInfo return model.convertBddToState(states.get(step)); } + @Override + public State getObservation(int step) + { + return null; + } + @Override public double getStateReward(int step, int rsi) { diff --git a/prism/src/cex/CexPathStates.java b/prism/src/cex/CexPathStates.java index b08b4763..a632c420 100644 --- a/prism/src/cex/CexPathStates.java +++ b/prism/src/cex/CexPathStates.java @@ -79,6 +79,12 @@ public class CexPathStates implements PathFullInfo return states.get(step); } + @Override + public State getObservation(int step) + { + return null; + } + @Override public double getStateReward(int step, int rsi) { diff --git a/prism/src/simulator/Path.java b/prism/src/simulator/Path.java index 5925319a..fa6fe313 100644 --- a/prism/src/simulator/Path.java +++ b/prism/src/simulator/Path.java @@ -37,22 +37,22 @@ public abstract class Path // MUTATORS /** - * Initialise the path with an initial state and rewards. - * Note: State object and array will be copied, not stored directly. + * Initialise the path with an initial state, observation and rewards. + * Note: State objects and array will be copied, not stored directly. */ - public abstract void initialise(State initialState, double[] initialStateRewards); + public abstract void initialise(State initialState, State initialObs, double[] initialStateRewards); /** * Add a step to the path. * Note: State object and arrays will be copied, not stored directly. */ - public abstract void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen); + public abstract void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen); /** * Add a timed step to the path. * Note: State object and arrays will be copied, not stored directly. */ - public abstract void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen); + public abstract void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen); // ACCESSORS @@ -76,6 +76,11 @@ public abstract class Path */ public abstract State getCurrentState(); + /** + * Get the observation for the current state, i.e. for the current final state of the path. + */ + public abstract State getCurrentObservation(); + /** * Get the action taken in the previous step. */ diff --git a/prism/src/simulator/PathFull.java b/prism/src/simulator/PathFull.java index abcd071d..f45bc882 100644 --- a/prism/src/simulator/PathFull.java +++ b/prism/src/simulator/PathFull.java @@ -89,7 +89,7 @@ public class PathFull extends Path implements PathFullInfo // MUTATORS (for Path) @Override - public void initialise(State initialState, double[] initialStateRewards) + public void initialise(State initialState, State initialObs, double[] initialStateRewards) { clear(); // Add new step item to the path @@ -97,6 +97,7 @@ public class PathFull extends Path implements PathFullInfo steps.add(step); // Add (copies of) initial state and state rewards to new step step.state = new State(initialState); + step.obs = initialObs == null ? null : new State(initialObs); step.stateRewards = initialStateRewards.clone(); // Set cumulative time/reward (up until entering this state) step.timeCumul = 0.0; @@ -108,15 +109,15 @@ public class PathFull extends Path implements PathFullInfo } @Override - public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards, + public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen) { - addStep(1.0, choice, action, actionString, probability, transitionRewards, newState, newStateRewards, modelGen); + addStep(1.0, choice, action, actionString, probability, transitionRewards, newState, newObs, newStateRewards, modelGen); } @Override public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, - double[] newStateRewards, ModelGenerator modelGen) + State newObs, double[] newStateRewards, ModelGenerator modelGen) { Step stepOld, stepNew; // Add info to last existing step @@ -130,8 +131,9 @@ public class PathFull extends Path implements PathFullInfo // Add new step item to the path stepNew = new Step(); steps.add(stepNew); - // Add (copies of) new state and state rewards to new step + // Add (copies of) new state, observation and state rewards to new step stepNew.state = new State(newState); + stepNew.obs = newObs == null ? null : new State(newObs); stepNew.stateRewards = newStateRewards.clone(); // Set cumulative time/rewards (up until entering this state) stepNew.timeCumul = stepOld.timeCumul + time; @@ -241,6 +243,12 @@ public class PathFull extends Path implements PathFullInfo return steps.get(steps.size() - 1).state; } + @Override + public State getCurrentObservation() + { + return steps.get(steps.size() - 1).obs; + } + @Override public Object getPreviousAction() { @@ -339,6 +347,12 @@ public class PathFull extends Path implements PathFullInfo return steps.get(step).state; } + @Override + public State getObservation(int step) + { + return steps.get(step).obs; + } + @Override public double getStateReward(int step, int rsi) { @@ -545,6 +559,7 @@ public class PathFull extends Path implements PathFullInfo { // Set (unknown) defaults and initialise arrays state = null; + obs = null; stateRewards = new double[numRewardStructs]; timeCumul = 0.0; rewardsCumul = new double[numRewardStructs]; @@ -558,6 +573,8 @@ public class PathFull extends Path implements PathFullInfo // Current state (before transition) public State state; + // Observation for current state + public State obs; // State rewards for current state public double stateRewards[]; // Cumulative time spent up until entering this state diff --git a/prism/src/simulator/PathFullInfo.java b/prism/src/simulator/PathFullInfo.java index fdde26d7..a3adc4dd 100644 --- a/prism/src/simulator/PathFullInfo.java +++ b/prism/src/simulator/PathFullInfo.java @@ -44,6 +44,12 @@ public interface PathFullInfo */ public abstract State getState(int step); + /** + * Get the observation at a given step of the path. + * @param step Step index (0 = initial state/step of path) + */ + public abstract State getObservation(int step); + /** * Get a state reward for the state at a given step of the path. * If no reward info is stored ({@link #hasRewardInfo()} is false), returns 0.0. diff --git a/prism/src/simulator/PathFullPrefix.java b/prism/src/simulator/PathFullPrefix.java index 2852616a..49abc188 100644 --- a/prism/src/simulator/PathFullPrefix.java +++ b/prism/src/simulator/PathFullPrefix.java @@ -52,19 +52,19 @@ public class PathFullPrefix extends Path // MUTATORS (for Path) @Override - public void initialise(State initialState, double[] initialStateRewards) + public void initialise(State initialState, State initialObs, double[] initialStateRewards) { // Do nothing (we are not allowed to modify the underlying PathFull) } @Override - public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards, ModelGenerator modelGen) + public void addStep(int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen) { // Do nothing (we are not allowed to modify the underlying PathFull) } @Override - public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, double[] newStateRewards, ModelGenerator modelGen) + public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transitionRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen) { // Do nothing (we are not allowed to modify the underlying PathFull) } @@ -102,6 +102,12 @@ public class PathFullPrefix extends Path return pathFull.getState(prefixLength); } + @Override + public State getCurrentObservation() + { + return pathFull.getObservation(prefixLength); + } + @Override public Object getPreviousAction() { diff --git a/prism/src/simulator/PathOnTheFly.java b/prism/src/simulator/PathOnTheFly.java index e075adf0..404c6e60 100644 --- a/prism/src/simulator/PathOnTheFly.java +++ b/prism/src/simulator/PathOnTheFly.java @@ -48,6 +48,7 @@ public class PathOnTheFly extends Path protected long size; protected State previousState; protected State currentState; + protected State currentObs; protected Object previousAction; protected String previousActionString; protected double previousProbability; @@ -73,6 +74,10 @@ public class PathOnTheFly extends Path // Create State objects for current/previous state previousState = new State(modelInfo.getNumVars()); currentState = new State(modelInfo.getNumVars()); + currentObs = null; + if (modelInfo.getModelType().partiallyObservable()) { + currentObs = new State(modelInfo.getNumObservables()); + } // Create arrays to store totals totalRewards = new double[numRewardStructs]; previousStateRewards = new double[numRewardStructs]; @@ -106,10 +111,13 @@ public class PathOnTheFly extends Path // MUTATORS (for Path) @Override - public void initialise(State initialState, double[] initialStateRewards) + public void initialise(State initialState, State initialObs, double[] initialStateRewards) { clear(); currentState.copy(initialState); + if (initialObs != null) { + currentObs.copy(initialObs); + } for (int i = 0; i < numRewardStructs; i++) { currentStateRewards[i] = initialStateRewards[i]; } @@ -118,17 +126,20 @@ public class PathOnTheFly extends Path } @Override - public void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen) + public void addStep(int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen) { - addStep(1.0, choice, action, actionString, probability, transRewards, newState, newStateRewards, modelGen); + addStep(1.0, choice, action, actionString, probability, transRewards, newState, newObs, newStateRewards, modelGen); } @Override - public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, double[] newStateRewards, ModelGenerator modelGen) + public void addStep(double time, int choice, Object action, String actionString, double probability, double[] transRewards, State newState, State newObs, double[] newStateRewards, ModelGenerator modelGen) { size++; previousState.copy(currentState); currentState.copy(newState); + if (newObs != null) { + currentObs.copy(newObs); + } previousAction = action; previousActionString = actionString; previousProbability = probability; @@ -174,6 +185,12 @@ public class PathOnTheFly extends Path return currentState; } + @Override + public State getCurrentObservation() + { + return currentObs; + } + @Override public Object getPreviousAction() { diff --git a/prism/src/simulator/SimulatorEngine.java b/prism/src/simulator/SimulatorEngine.java index 9fc29dcd..de9d8e5d 100644 --- a/prism/src/simulator/SimulatorEngine.java +++ b/prism/src/simulator/SimulatorEngine.java @@ -303,10 +303,12 @@ public class SimulatorEngine extends PrismComponent throw new PrismNotSupportedException("Random choice of multiple initial states not yet supported"); } } + // Get initial observation + State currentObs = modelGen.getObservation(currentState); // Get initial state reward calculateStateRewards(currentState, tmpStateRewards); // Initialise stored path - path.initialise(currentState, tmpStateRewards); + path.initialise(currentState, currentObs, tmpStateRewards); // Explore initial state in model generator computeTransitionsForState(currentState); // Reset and then update samplers for any loaded properties @@ -373,6 +375,7 @@ public class SimulatorEngine extends PrismComponent executeTransition(ref.i, ref.offset, -1); break; case MDP: + case POMDP: // Pick a random choice i = rng.randomUnifInt(modelGen.getNumChoices()); // Pick a random transition from this choice @@ -858,10 +861,12 @@ public class SimulatorEngine extends PrismComponent calculateTransitionRewards(path.getCurrentState(), action, tmpTransitionRewards); // Compute next state currentState.copy(modelGen.computeTransitionTarget(i, offset)); + // Compute observation for new state + State currentObs = modelGen.getObservation(currentState); // Compute state rewards for new state calculateStateRewards(currentState, tmpStateRewards); // Update path - path.addStep(index, action, actionString, p, tmpTransitionRewards, currentState, tmpStateRewards, modelGen); + path.addStep(index, action, actionString, p, tmpTransitionRewards, currentState, currentObs, tmpStateRewards, modelGen); // Explore new state in model generator computeTransitionsForState(currentState); // Update samplers for any loaded properties @@ -896,10 +901,12 @@ public class SimulatorEngine extends PrismComponent calculateTransitionRewards(path.getCurrentState(), action, tmpTransitionRewards); // Compute next state currentState.copy(modelGen.computeTransitionTarget(i, offset)); + // Compute observation for new state + State currentObs = modelGen.getObservation(currentState); // Compute state rewards for new state calculateStateRewards(currentState, tmpStateRewards); // Update path - path.addStep(time, index, action, actionString, p, tmpTransitionRewards, currentState, tmpStateRewards, modelGen); + path.addStep(time, index, action, actionString, p, tmpTransitionRewards, currentState, currentObs, tmpStateRewards, modelGen); // Explore new state in model generator computeTransitionsForState(currentState); // Update samplers for any loaded properties @@ -1278,6 +1285,16 @@ public class SimulatorEngine extends PrismComponent return ((PathFull) path).getState(step); } + /** + * Get the observation at a given step of the path. + * (Not applicable for on-the-fly paths) + * @param step Step index (0 = initial state/step of path) + */ + public State getObservationOfPathStep(int step) + { + return ((PathFull) path).getObservation(step); + } + /** * Get a state reward for the state at a given step of the path. * (Not applicable for on-the-fly paths)