Browse Source
explicit.ZeroRewardECQuotient
explicit.ZeroRewardECQuotient
Quotient of an MDP where the zero-reward maximal end components are collapsed to single states. git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@12123 bbc10eb1-c90d-0410-af57-cb519fbb1720master
1 changed files with 178 additions and 0 deletions
@ -0,0 +1,178 @@ |
|||||
|
//============================================================================== |
||||
|
// |
||||
|
// Copyright (c) 2016- |
||||
|
// Authors: |
||||
|
// * Joachim Klein <klein@tcs.inf.tu-dresden.de> (TU Dresden) |
||||
|
// |
||||
|
//------------------------------------------------------------------------------ |
||||
|
// |
||||
|
// This file is part of PRISM. |
||||
|
// |
||||
|
// PRISM is free software; you can redistribute it and/or modify |
||||
|
// it under the terms of the GNU General Public License as published by |
||||
|
// the Free Software Foundation; either version 2 of the License, or |
||||
|
// (at your option) any later version. |
||||
|
// |
||||
|
// PRISM is distributed in the hope that it will be useful, |
||||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of |
||||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
||||
|
// GNU General Public License for more details. |
||||
|
// |
||||
|
// You should have received a copy of the GNU General Public License |
||||
|
// along with PRISM; if not, write to the Free Software Foundation, |
||||
|
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
||||
|
// |
||||
|
//============================================================================== |
||||
|
|
||||
|
package explicit; |
||||
|
|
||||
|
import java.util.Arrays; |
||||
|
import java.util.BitSet; |
||||
|
import java.util.List; |
||||
|
|
||||
|
import common.functions.primitive.PairPredicateInt; |
||||
|
import common.IterableBitSet; |
||||
|
import explicit.graphviz.Decorator; |
||||
|
import explicit.graphviz.ShowRewardDecorator; |
||||
|
import explicit.modelviews.EquivalenceRelationInteger; |
||||
|
import explicit.modelviews.MDPDroppedChoicesCached; |
||||
|
import explicit.modelviews.MDPEquiv; |
||||
|
import explicit.modelviews.MDPEquiv.StateChoicePair; |
||||
|
import explicit.rewards.MDPRewards; |
||||
|
import prism.PrismComponent; |
||||
|
import prism.PrismException; |
||||
|
|
||||
|
/** |
||||
|
* Helper class for obtaining the zero-reward EC quotient of an MDP. |
||||
|
* <br> |
||||
|
* In the original MDP, the zero-reward maximal end components are identified, i.e., |
||||
|
* those end components where there is a strategy to stay infinitely without ever |
||||
|
* seeing another reward. |
||||
|
* <br> |
||||
|
* In the quotient, those zero-reward MECs are each collapsed to a single state, |
||||
|
* with choices that have transitions outside the MEC preserved. |
||||
|
*/ |
||||
|
public class ZeroRewardECQuotient |
||||
|
{ |
||||
|
private MDPEquiv quotient; |
||||
|
private MDPRewards quotientRewards; |
||||
|
private int numberOfZMECs; |
||||
|
|
||||
|
private static final boolean debug = false; |
||||
|
|
||||
|
private ZeroRewardECQuotient(MDPEquiv quotient, MDPRewards quotientRewards, int numberOfZMECs) |
||||
|
{ |
||||
|
this.quotient = quotient; |
||||
|
this.quotientRewards = quotientRewards; |
||||
|
this.numberOfZMECs = numberOfZMECs; |
||||
|
} |
||||
|
|
||||
|
public MDP getModel() |
||||
|
{ |
||||
|
return quotient; |
||||
|
} |
||||
|
|
||||
|
public MDPRewards getRewards() |
||||
|
{ |
||||
|
return quotientRewards; |
||||
|
} |
||||
|
|
||||
|
public int getNumberOfZeroRewardMECs() |
||||
|
{ |
||||
|
return numberOfZMECs; |
||||
|
} |
||||
|
|
||||
|
public BitSet getNonRepresentativeStates() |
||||
|
{ |
||||
|
return quotient.getNonRepresentativeStates(); |
||||
|
} |
||||
|
|
||||
|
public void mapResults(double[] soln) { |
||||
|
for (int s : new IterableBitSet(quotient.getNonRepresentativeStates())) { |
||||
|
int representative = quotient.mapStateToRestrictedModel(s); |
||||
|
soln[s] = soln[representative]; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static ZeroRewardECQuotient getQuotient(PrismComponent parent, MDP mdp, BitSet restrict, MDPRewards rewards) throws PrismException |
||||
|
{ |
||||
|
PairPredicateInt positiveRewardChoice = (int s, int i) -> { |
||||
|
if (rewards.getStateReward(s) > 0) |
||||
|
return true; |
||||
|
if (rewards.getTransitionReward(s, i) > 0) { |
||||
|
return true; |
||||
|
} |
||||
|
return false; |
||||
|
}; |
||||
|
|
||||
|
// drop positive reward choices |
||||
|
MDPDroppedChoicesCached zeroRewMDP = new MDPDroppedChoicesCached(mdp, positiveRewardChoice); |
||||
|
// compute the MECs in the zero-reward sub-MDP |
||||
|
ECComputer ecComputer = ECComputerDefault.createECComputer(parent, zeroRewMDP); |
||||
|
ecComputer.computeMECStates(restrict); |
||||
|
|
||||
|
List<BitSet> mecs = ecComputer.getMECStates(); |
||||
|
|
||||
|
if (mecs.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
// the equivalence relation on the states |
||||
|
EquivalenceRelationInteger equiv = new EquivalenceRelationInteger(mecs); |
||||
|
|
||||
|
// we drop zero reward loops on the equivalence classes |
||||
|
PairPredicateInt zeroRewardECloop = (int s, int i) -> { |
||||
|
if (positiveRewardChoice.test(s, i)) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
// return true if all successors t of state s for choice i are in the |
||||
|
// same equivalence class |
||||
|
boolean rv = mdp.allSuccessorsMatch(s, i, (int t) -> equiv.test(s,t)); |
||||
|
return rv; |
||||
|
}; |
||||
|
|
||||
|
final MDPDroppedChoicesCached droppedZeroRewardLoops = new MDPDroppedChoicesCached(mdp, zeroRewardECloop); |
||||
|
if (debug) |
||||
|
droppedZeroRewardLoops.exportToDotFile("zero-mec-loops-dropped.dot"); |
||||
|
|
||||
|
BasicModelTransformation<MDP, MDPEquiv> transform = MDPEquiv.transform(droppedZeroRewardLoops, equiv); |
||||
|
final MDPEquiv quotient = transform.getTransformedModel(); |
||||
|
|
||||
|
MDPRewards quotientRewards = new MDPRewards() { |
||||
|
@Override |
||||
|
public double getStateReward(int s) |
||||
|
{ |
||||
|
return rewards.getStateReward(s); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public double getTransitionReward(int s, int i) |
||||
|
{ |
||||
|
StateChoicePair mapped = quotient.mapToOriginalModel(s, i); |
||||
|
int mappedChoiceInOriginal = droppedZeroRewardLoops.mapChoiceToOriginalModel(mapped.getState(), mapped.getChoice()); |
||||
|
return rewards.getTransitionReward(mapped.getState(), mappedChoiceInOriginal); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public MDPRewards liftFromModel(Product<? extends Model> product) |
||||
|
{ |
||||
|
throw new RuntimeException("Not implemented"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public boolean hasTransitionRewards() |
||||
|
{ |
||||
|
return rewards.hasTransitionRewards(); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
if (debug) { |
||||
|
List<Decorator> decorators = Arrays.asList(new ShowRewardDecorator(quotientRewards)); |
||||
|
quotient.exportToDotFile("zero-mec-quotient.dot", decorators); |
||||
|
} |
||||
|
|
||||
|
return new ZeroRewardECQuotient(quotient, quotientRewards, mecs.size()); |
||||
|
} |
||||
|
|
||||
|
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue