explicit.ZeroRewardECQuotient

Quotient of an MDP where the zero-reward maximal end components are collapsed to single states. git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@12123 bbc10eb1-c90d-0410-af57-cb519fbb1720
9 years ago · 265e827391
1 changed files with 178 additions and 0 deletions
--- a/prism/src/explicit/ZeroRewardECQuotient.java
+++ b/prism/src/explicit/ZeroRewardECQuotient.java
@ -0,0 +1,178 @@
+//==============================================================================
+//	
+//	Copyright (c) 2016-
+//	Authors:
+//	* Joachim Klein <klein@tcs.inf.tu-dresden.de> (TU Dresden)
+//	
+//------------------------------------------------------------------------------
+//	
+//	This file is part of PRISM.
+//	
+//	PRISM is free software; you can redistribute it and/or modify
+//	it under the terms of the GNU General Public License as published by
+//	the Free Software Foundation; either version 2 of the License, or
+//	(at your option) any later version.
+//	
+//	PRISM is distributed in the hope that it will be useful,
+//	but WITHOUT ANY WARRANTY; without even the implied warranty of
+//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//	GNU General Public License for more details.
+//	
+//	You should have received a copy of the GNU General Public License
+//	along with PRISM; if not, write to the Free Software Foundation,
+//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//	
+//==============================================================================
+
+package explicit;
+
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.List;
+
+import common.functions.primitive.PairPredicateInt;
+import common.IterableBitSet;
+import explicit.graphviz.Decorator;
+import explicit.graphviz.ShowRewardDecorator;
+import explicit.modelviews.EquivalenceRelationInteger;
+import explicit.modelviews.MDPDroppedChoicesCached;
+import explicit.modelviews.MDPEquiv;
+import explicit.modelviews.MDPEquiv.StateChoicePair;
+import explicit.rewards.MDPRewards;
+import prism.PrismComponent;
+import prism.PrismException;
+
+/**
+ * Helper class for obtaining the zero-reward EC quotient of an MDP.
+ * <br>
+ * In the original MDP, the zero-reward maximal end components are identified, i.e.,
+ * those end components where there is a strategy to stay infinitely without ever
+ * seeing another reward.
+ * <br>
+ * In the quotient, those zero-reward MECs are each collapsed to a single state,
+ * with choices that have transitions outside the MEC preserved.
+ */
+public class ZeroRewardECQuotient
+{
+	private MDPEquiv quotient;
+	private MDPRewards quotientRewards;
+	private int numberOfZMECs;
+
+	private static final boolean debug = false;
+
+	private ZeroRewardECQuotient(MDPEquiv quotient, MDPRewards quotientRewards, int numberOfZMECs)
+	{
+		this.quotient = quotient;
+		this.quotientRewards = quotientRewards;
+		this.numberOfZMECs = numberOfZMECs;
+	}
+
+	public MDP getModel()
+	{
+		return quotient;
+	}
+
+	public MDPRewards getRewards()
+	{
+		return quotientRewards;
+	}
+
+	public int getNumberOfZeroRewardMECs()
+	{
+		return numberOfZMECs;
+	}
+
+	public BitSet getNonRepresentativeStates()
+	{
+		return quotient.getNonRepresentativeStates();
+	}
+
+	public void mapResults(double[] soln) {
+		for (int s : new IterableBitSet(quotient.getNonRepresentativeStates())) {
+			int representative = quotient.mapStateToRestrictedModel(s);
+			soln[s] = soln[representative];
+		}
+	}
+
+	public static ZeroRewardECQuotient getQuotient(PrismComponent parent, MDP mdp, BitSet restrict, MDPRewards rewards) throws PrismException
+	{
+		PairPredicateInt positiveRewardChoice = (int s, int i) -> {
+			if (rewards.getStateReward(s) > 0)
+				return true;
+			if (rewards.getTransitionReward(s, i) > 0) {
+				return true;
+			}
+			return false;
+		};
+
+		// drop positive reward choices
+		MDPDroppedChoicesCached zeroRewMDP = new MDPDroppedChoicesCached(mdp, positiveRewardChoice);
+		// compute the MECs in the zero-reward sub-MDP
+		ECComputer ecComputer = ECComputerDefault.createECComputer(parent, zeroRewMDP);
+		ecComputer.computeMECStates(restrict);
+
+		List<BitSet> mecs = ecComputer.getMECStates();
+
+		if (mecs.isEmpty()) {
+			return null;
+		}
+
+		// the equivalence relation on the states
+		EquivalenceRelationInteger equiv = new EquivalenceRelationInteger(mecs);
+
+		// we drop zero reward loops on the equivalence classes
+		PairPredicateInt zeroRewardECloop = (int s, int i) -> {
+			if (positiveRewardChoice.test(s, i)) {
+				return false;
+			}
+
+			// return true if all successors t of state s for choice i are in the
+			// same equivalence class
+			boolean rv = mdp.allSuccessorsMatch(s, i, (int t) -> equiv.test(s,t));
+			return rv;
+		};
+
+		final MDPDroppedChoicesCached droppedZeroRewardLoops = new MDPDroppedChoicesCached(mdp, zeroRewardECloop);
+		if (debug)
+			droppedZeroRewardLoops.exportToDotFile("zero-mec-loops-dropped.dot");
+
+		BasicModelTransformation<MDP, MDPEquiv> transform = MDPEquiv.transform(droppedZeroRewardLoops, equiv);
+		final MDPEquiv quotient = transform.getTransformedModel();
+
+		MDPRewards quotientRewards = new MDPRewards() {
+			@Override
+			public double getStateReward(int s)
+			{
+				return rewards.getStateReward(s);
+			}
+
+			@Override
+			public double getTransitionReward(int s, int i)
+			{
+				StateChoicePair mapped = quotient.mapToOriginalModel(s, i);
+				int mappedChoiceInOriginal = droppedZeroRewardLoops.mapChoiceToOriginalModel(mapped.getState(), mapped.getChoice());
+				return rewards.getTransitionReward(mapped.getState(), mappedChoiceInOriginal);
+			}
+
+			@Override
+			public MDPRewards liftFromModel(Product<? extends Model> product)
+			{
+				throw new RuntimeException("Not implemented");
+			}
+
+			@Override
+			public boolean hasTransitionRewards()
+			{
+				return rewards.hasTransitionRewards();
+			}
+		};
+
+		if (debug) {
+			List<Decorator> decorators = Arrays.asList(new ShowRewardDecorator(quotientRewards));
+			quotient.exportToDotFile("zero-mec-quotient.dot", decorators);
+		}
+
+		return new ZeroRewardECQuotient(quotient, quotientRewards, mecs.size());
+	}
+
+}