Browse Source

explicit.ZeroRewardECQuotient

Quotient of an MDP where the zero-reward maximal end components
are collapsed to single states.


git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@12123 bbc10eb1-c90d-0410-af57-cb519fbb1720
master
Joachim Klein 9 years ago
parent
commit
265e827391
  1. 178
      prism/src/explicit/ZeroRewardECQuotient.java

178
prism/src/explicit/ZeroRewardECQuotient.java

@ -0,0 +1,178 @@
//==============================================================================
//
// Copyright (c) 2016-
// Authors:
// * Joachim Klein <klein@tcs.inf.tu-dresden.de> (TU Dresden)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
package explicit;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import common.functions.primitive.PairPredicateInt;
import common.IterableBitSet;
import explicit.graphviz.Decorator;
import explicit.graphviz.ShowRewardDecorator;
import explicit.modelviews.EquivalenceRelationInteger;
import explicit.modelviews.MDPDroppedChoicesCached;
import explicit.modelviews.MDPEquiv;
import explicit.modelviews.MDPEquiv.StateChoicePair;
import explicit.rewards.MDPRewards;
import prism.PrismComponent;
import prism.PrismException;
/**
* Helper class for obtaining the zero-reward EC quotient of an MDP.
* <br>
* In the original MDP, the zero-reward maximal end components are identified, i.e.,
* those end components where there is a strategy to stay infinitely without ever
* seeing another reward.
* <br>
* In the quotient, those zero-reward MECs are each collapsed to a single state,
* with choices that have transitions outside the MEC preserved.
*/
public class ZeroRewardECQuotient
{
private MDPEquiv quotient;
private MDPRewards quotientRewards;
private int numberOfZMECs;
private static final boolean debug = false;
private ZeroRewardECQuotient(MDPEquiv quotient, MDPRewards quotientRewards, int numberOfZMECs)
{
this.quotient = quotient;
this.quotientRewards = quotientRewards;
this.numberOfZMECs = numberOfZMECs;
}
public MDP getModel()
{
return quotient;
}
public MDPRewards getRewards()
{
return quotientRewards;
}
public int getNumberOfZeroRewardMECs()
{
return numberOfZMECs;
}
public BitSet getNonRepresentativeStates()
{
return quotient.getNonRepresentativeStates();
}
public void mapResults(double[] soln) {
for (int s : new IterableBitSet(quotient.getNonRepresentativeStates())) {
int representative = quotient.mapStateToRestrictedModel(s);
soln[s] = soln[representative];
}
}
public static ZeroRewardECQuotient getQuotient(PrismComponent parent, MDP mdp, BitSet restrict, MDPRewards rewards) throws PrismException
{
PairPredicateInt positiveRewardChoice = (int s, int i) -> {
if (rewards.getStateReward(s) > 0)
return true;
if (rewards.getTransitionReward(s, i) > 0) {
return true;
}
return false;
};
// drop positive reward choices
MDPDroppedChoicesCached zeroRewMDP = new MDPDroppedChoicesCached(mdp, positiveRewardChoice);
// compute the MECs in the zero-reward sub-MDP
ECComputer ecComputer = ECComputerDefault.createECComputer(parent, zeroRewMDP);
ecComputer.computeMECStates(restrict);
List<BitSet> mecs = ecComputer.getMECStates();
if (mecs.isEmpty()) {
return null;
}
// the equivalence relation on the states
EquivalenceRelationInteger equiv = new EquivalenceRelationInteger(mecs);
// we drop zero reward loops on the equivalence classes
PairPredicateInt zeroRewardECloop = (int s, int i) -> {
if (positiveRewardChoice.test(s, i)) {
return false;
}
// return true if all successors t of state s for choice i are in the
// same equivalence class
boolean rv = mdp.allSuccessorsMatch(s, i, (int t) -> equiv.test(s,t));
return rv;
};
final MDPDroppedChoicesCached droppedZeroRewardLoops = new MDPDroppedChoicesCached(mdp, zeroRewardECloop);
if (debug)
droppedZeroRewardLoops.exportToDotFile("zero-mec-loops-dropped.dot");
BasicModelTransformation<MDP, MDPEquiv> transform = MDPEquiv.transform(droppedZeroRewardLoops, equiv);
final MDPEquiv quotient = transform.getTransformedModel();
MDPRewards quotientRewards = new MDPRewards() {
@Override
public double getStateReward(int s)
{
return rewards.getStateReward(s);
}
@Override
public double getTransitionReward(int s, int i)
{
StateChoicePair mapped = quotient.mapToOriginalModel(s, i);
int mappedChoiceInOriginal = droppedZeroRewardLoops.mapChoiceToOriginalModel(mapped.getState(), mapped.getChoice());
return rewards.getTransitionReward(mapped.getState(), mappedChoiceInOriginal);
}
@Override
public MDPRewards liftFromModel(Product<? extends Model> product)
{
throw new RuntimeException("Not implemented");
}
@Override
public boolean hasTransitionRewards()
{
return rewards.hasTransitionRewards();
}
};
if (debug) {
List<Decorator> decorators = Arrays.asList(new ShowRewardDecorator(quotientRewards));
quotient.exportToDotFile("zero-mec-quotient.dot", decorators);
}
return new ZeroRewardECQuotient(quotient, quotientRewards, mecs.size());
}
}
Loading…
Cancel
Save