//==============================================================================
//	
//	Copyright (c) 2016-
//	Authors:
//	* Joachim Klein <klein@tcs.inf.tu-dresden.de> (TU Dresden)
//	
//------------------------------------------------------------------------------
//	
//	This file is part of PRISM.
//	
//	PRISM is free software; you can redistribute it and/or modify
//	it under the terms of the GNU General Public License as published by
//	the Free Software Foundation; either version 2 of the License, or
//	(at your option) any later version.
//	
//	PRISM is distributed in the hope that it will be useful,
//	but WITHOUT ANY WARRANTY; without even the implied warranty of
//	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//	GNU General Public License for more details.
//	
//	You should have received a copy of the GNU General Public License
//	along with PRISM; if not, write to the Free Software Foundation,
//	Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//	
//==============================================================================

package explicit;

import java.util.Arrays;
import java.util.BitSet;
import java.util.List;

import common.functions.primitive.PairPredicateInt;
import common.IterableBitSet;
import explicit.graphviz.Decorator;
import explicit.graphviz.ShowRewardDecorator;
import explicit.modelviews.EquivalenceRelationInteger;
import explicit.modelviews.MDPDroppedChoicesCached;
import explicit.modelviews.MDPEquiv;
import explicit.modelviews.MDPEquiv.StateChoicePair;
import explicit.rewards.MDPRewards;
import prism.PrismComponent;
import prism.PrismException;

/**
 * Helper class for obtaining the zero-reward EC quotient of an MDP.
 * <br>
 * In the original MDP, the zero-reward maximal end components are identified, i.e.,
 * those end components where there is a strategy to stay infinitely without ever
 * seeing another reward.
 * <br>
 * In the quotient, those zero-reward MECs are each collapsed to a single state,
 * with choices that have transitions outside the MEC preserved.
 */
public class ZeroRewardECQuotient
{
	private MDPEquiv quotient;
	private MDPRewards quotientRewards;
	private int numberOfZMECs;

	private static final boolean debug = false;

	private ZeroRewardECQuotient(MDPEquiv quotient, MDPRewards quotientRewards, int numberOfZMECs)
	{
		this.quotient = quotient;
		this.quotientRewards = quotientRewards;
		this.numberOfZMECs = numberOfZMECs;
	}

	public MDP getModel()
	{
		return quotient;
	}

	public MDPRewards getRewards()
	{
		return quotientRewards;
	}

	public int getNumberOfZeroRewardMECs()
	{
		return numberOfZMECs;
	}

	public BitSet getNonRepresentativeStates()
	{
		return quotient.getNonRepresentativeStates();
	}

	public void mapResults(double[] soln) {
		for (int s : new IterableBitSet(quotient.getNonRepresentativeStates())) {
			int representative = quotient.mapStateToRestrictedModel(s);
			soln[s] = soln[representative];
		}
	}

	public static ZeroRewardECQuotient getQuotient(PrismComponent parent, MDP mdp, BitSet restrict, MDPRewards rewards) throws PrismException
	{
		PairPredicateInt positiveRewardChoice = (int s, int i) -> {
			if (rewards.getStateReward(s) > 0)
				return true;
			if (rewards.getTransitionReward(s, i) > 0) {
				return true;
			}
			return false;
		};

		// drop positive reward choices
		MDPDroppedChoicesCached zeroRewMDP = new MDPDroppedChoicesCached(mdp, positiveRewardChoice);
		// compute the MECs in the zero-reward sub-MDP
		ECComputer ecComputer = ECComputerDefault.createECComputer(parent, zeroRewMDP);
		ecComputer.computeMECStates(restrict);

		List<BitSet> mecs = ecComputer.getMECStates();

		if (mecs.isEmpty()) {
			return null;
		}

		// the equivalence relation on the states
		EquivalenceRelationInteger equiv = new EquivalenceRelationInteger(mecs);

		// we drop zero reward loops on the equivalence classes
		PairPredicateInt zeroRewardECloop = (int s, int i) -> {
			if (positiveRewardChoice.test(s, i)) {
				return false;
			}

			// return true if all successors t of state s for choice i are in the
			// same equivalence class
			boolean rv = mdp.allSuccessorsMatch(s, i, (int t) -> equiv.test(s,t));
			return rv;
		};

		final MDPDroppedChoicesCached droppedZeroRewardLoops = new MDPDroppedChoicesCached(mdp, zeroRewardECloop);
		if (debug)
			droppedZeroRewardLoops.exportToDotFile("zero-mec-loops-dropped.dot");

		BasicModelTransformation<MDP, MDPEquiv> transform = MDPEquiv.transform(droppedZeroRewardLoops, equiv);
		final MDPEquiv quotient = transform.getTransformedModel();

		MDPRewards quotientRewards = new MDPRewards() {
			@Override
			public double getStateReward(int s)
			{
				return rewards.getStateReward(s);
			}

			@Override
			public double getTransitionReward(int s, int i)
			{
				StateChoicePair mapped = quotient.mapToOriginalModel(s, i);
				int mappedChoiceInOriginal = droppedZeroRewardLoops.mapChoiceToOriginalModel(mapped.getState(), mapped.getChoice());
				return rewards.getTransitionReward(mapped.getState(), mappedChoiceInOriginal);
			}

			@Override
			public MDPRewards liftFromModel(Product<? extends Model> product)
			{
				throw new RuntimeException("Not implemented");
			}

			@Override
			public boolean hasTransitionRewards()
			{
				return rewards.hasTransitionRewards();
			}
		};

		if (debug) {
			List<Decorator> decorators = Arrays.asList(new ShowRewardDecorator(quotientRewards));
			quotient.exportToDotFile("zero-mec-quotient.dot", decorators);
		}

		return new ZeroRewardECQuotient(quotient, quotientRewards, mecs.size());
	}

}