Browse Source

Rename adversary to strategy in explicit MDP model checking + align with prism-games.

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@6899 bbc10eb1-c90d-0410-af57-cb519fbb1720
master
Dave Parker 13 years ago
parent
commit
064356d11f
  1. 32
      prism/src/explicit/MDP.java
  2. 20
      prism/src/explicit/MDPExplicit.java
  3. 108
      prism/src/explicit/MDPModelChecker.java
  4. 42
      prism/src/explicit/MDPSimple.java
  5. 65
      prism/src/explicit/MDPSparse.java

32
prism/src/explicit/MDP.java

@ -92,26 +92,26 @@ public interface MDP extends Model
/**
* Do a matrix-vector multiplication followed by min/max, i.e. one step of value iteration,
* i.e. for all s: result[s] = min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal adversary info.
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param adv Storage for adversary choice indices (ignored if null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[]);
public void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int strat[]);
/**
* Do a single row of matrix-vector multiplication followed by min/max,
* i.e. return min/max_k { sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal adversary info.
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param min Min or max for (true=min, false=max)
* @param adv Storage for adversary choice indices (ignored if null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int adv[]);
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[]);
/**
* Determine which choices result in min/max after a single row of matrix-vector multiplication.
@ -166,15 +166,16 @@ public interface MDP extends Model
/**
* Do a matrix-vector multiplication and sum of action reward followed by min/max, i.e. one step of value iteration.
* i.e. for all s: result[s] = min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param result Vector to store result in
* @param subset Only do multiplication for these rows (ignored if null)
* @param complement If true, {@code subset} is taken to be its complement (ignored if {@code subset} is null)
* @param adv Storage for adversary choice indices (ignored if null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[]);
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int strat[]);
/**
* Do a Gauss-Seidel-style matrix-vector multiplication and sum of action reward followed by min/max.
@ -195,13 +196,14 @@ public interface MDP extends Model
/**
* Do a single row of matrix-vector multiplication and sum of action reward followed by min/max.
* i.e. return min/max_k { rew(s) + sum_j P_k(s,j)*vect[j] }
* Optionally, store optimal (memoryless) strategy info.
* @param s Row index
* @param vect Vector to multiply by
* @param mdpRewards The rewards
* @param min Min or max for (true=min, false=max)
* @param adv Storage for adversary choice indices (ignored if null)
* @param strat Storage for (memoryless) strategy choice indices (ignored if null)
*/
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[]);
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[]);
/**
* Do a single row of Jacobi-style matrix-vector multiplication and sum of action reward followed by min/max.
@ -224,7 +226,7 @@ public interface MDP extends Model
public List<Integer> mvMultRewMinMaxSingleChoices(int s, double vect[], MDPRewards mdpRewards, boolean min, double val);
/**
* Multiply the probability matrix induced by the mdp and {@code adv}
* Multiply the probability matrix induced by the MDP and {@code strat}
* to the right of {@code source}. Only those entries in {@code source}
* and only those columns in the probability matrix are considered, that
* are elements of {@code states}.
@ -232,14 +234,14 @@ public interface MDP extends Model
* The result of this multiplication is added to the contents of {@code dest}.
*
* @param states States for which to multiply
* @param adv Strategy to use
* @param strat (Memoryless) strategy to use
* @param source Vector to multiply matrix with
* @param dest Vector to write result to.
*/
public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest);
public void mvMultRight(int[] states, int[] strat, double[] source, double[] dest);
/**
* Export to a dot file, highlighting states in 'mark' and choices for a (memoryless) adversary.
* Export to a dot file, highlighting states in 'mark' and choices for a (memoryless) strategy.
*/
public void exportToDotFileWithAdv(String filename, BitSet mark, int adv[]) throws PrismException;
public void exportToDotFileWithStrat(String filename, BitSet mark, int strat[]) throws PrismException;
}

20
prism/src/explicit/MDPExplicit.java

@ -141,7 +141,7 @@ public abstract class MDPExplicit extends ModelExplicit implements MDP
}
@Override
public void exportToDotFileWithAdv(String filename, BitSet mark, int adv[]) throws PrismException
public void exportToDotFileWithStrat(String filename, BitSet mark, int strat[]) throws PrismException
{
int i, j, numChoices;
String nij;
@ -155,7 +155,7 @@ public abstract class MDPExplicit extends ModelExplicit implements MDP
out.write(i + " [style=filled fillcolor=\"#cccccc\"]\n");
numChoices = getNumChoices(i);
for (j = 0; j < numChoices; j++) {
style = (adv[i] == j) ? ",color=\"#ff0000\",fontcolor=\"#ff0000\"" : "";
style = (strat[i] == j) ? ",color=\"#ff0000\",fontcolor=\"#ff0000\"" : "";
action = getAction(i, j);
nij = "n" + i + "_" + j;
out.write(i + " -> " + nij + " [ arrowhead=none,label=\"" + j);
@ -227,19 +227,19 @@ public abstract class MDPExplicit extends ModelExplicit implements MDP
// Accessors (for MDP)
@Override
public void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int adv[])
public void mvMultMinMax(double vect[], boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
int s;
// Loop depends on subset/complement arguments
if (subset == null) {
for (s = 0; s < numStates; s++)
result[s] = mvMultMinMaxSingle(s, vect, min, adv);
result[s] = mvMultMinMaxSingle(s, vect, min, strat);
} else if (complement) {
for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
result[s] = mvMultMinMaxSingle(s, vect, min, adv);
result[s] = mvMultMinMaxSingle(s, vect, min, strat);
} else {
for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
result[s] = mvMultMinMaxSingle(s, vect, min, adv);
result[s] = mvMultMinMaxSingle(s, vect, min, strat);
}
}
@ -284,19 +284,19 @@ public abstract class MDPExplicit extends ModelExplicit implements MDP
}
@Override
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int adv[])
public void mvMultRewMinMax(double vect[], MDPRewards mdpRewards, boolean min, double result[], BitSet subset, boolean complement, int strat[])
{
int s;
// Loop depends on subset/complement arguments
if (subset == null) {
for (s = 0; s < numStates; s++)
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
} else if (complement) {
for (s = subset.nextClearBit(0); s < numStates; s = subset.nextClearBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
} else {
for (s = subset.nextSetBit(0); s >= 0; s = subset.nextSetBit(s + 1))
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, adv);
result[s] = mvMultRewMinMaxSingle(s, vect, mdpRewards, min, strat);
}
}

108
prism/src/explicit/MDPModelChecker.java

@ -321,7 +321,7 @@ public class MDPModelChecker extends ProbModelChecker
BitSet no, yes;
int i, n, numYes, numNo;
long timer, timerProb0, timerProb1;
boolean genAdv;
boolean genStrat;
// Local copy of setting
MDPSolnMethod mdpSolnMethod = this.mdpSolnMethod;
@ -339,8 +339,8 @@ public class MDPModelChecker extends ProbModelChecker
throw new PrismException("Value iteration from above only works for minimum probabilities");
}
// Are we generating an optimal adversary?
genAdv = exportAdv;
// Are we generating an optimal strategy?
genStrat = exportAdv;
// Start probabilistic reachability
timer = System.currentTimeMillis();
@ -370,7 +370,7 @@ public class MDPModelChecker extends ProbModelChecker
}
timerProb0 = System.currentTimeMillis() - timerProb0;
timerProb1 = System.currentTimeMillis();
if (precomp && prob1 && !genAdv) {
if (precomp && prob1 && !genStrat) {
yes = prob1(mdp, remain, target, min);
} else {
yes = (BitSet) target.clone();
@ -574,12 +574,12 @@ public class MDPModelChecker extends ProbModelChecker
BitSet unknown;
int i, n, iters;
double soln[], soln2[], tmpsoln[], initVal;
int adv[] = null;
boolean genAdv, done;
int strat[] = null;
boolean genStrat, done;
long timer;
// Are we generating an optimal adversary?
genAdv = exportAdv;
// Are we generating an optimal strategy?
genStrat = exportAdv;
// Start value iteration
timer = System.currentTimeMillis();
@ -617,11 +617,11 @@ public class MDPModelChecker extends ProbModelChecker
if (known != null)
unknown.andNot(known);
// Create/initialise adversary storage
if (genAdv) {
adv = new int[n];
// Create/initialise strategy storage
if (genStrat) {
strat = new int[n];
for (i = 0; i < n; i++) {
adv[i] = -1;
strat[i] = -1;
}
}
@ -631,7 +631,7 @@ public class MDPModelChecker extends ProbModelChecker
while (!done && iters < maxIters) {
iters++;
// Matrix-vector multiply and min/max ops
mdp.mvMultMinMax(soln, min, soln2, unknown, false, genAdv ? adv : null);
mdp.mvMultMinMax(soln, min, soln2, unknown, false, genStrat ? strat : null);
// Check termination
done = PrismUtils.doublesAreClose(soln, soln2, termCritParam, termCrit == TermCrit.ABSOLUTE);
// Swap vectors for next iter
@ -652,15 +652,15 @@ public class MDPModelChecker extends ProbModelChecker
throw new PrismException(msg);
}
// Process adversary
if (genAdv) {
// Prune adversary
restrictAdversaryToReachableStates(mdp, adv);
// Print adversary
// Export adversary
if (genStrat && exportAdv) {
// Prune strategy
restrictStrategyToReachableStates(mdp, strat);
// Print strategy
PrismLog out = new PrismFileLog(exportAdvFilename);
out.print("Adv:");
out.print("Strat:");
for (i = 0; i < n; i++) {
out.print(" " + i + ":" + adv[i]);
out.print(" " + i + ":" + strat[i]);
}
out.println();
}
@ -689,9 +689,13 @@ public class MDPModelChecker extends ProbModelChecker
BitSet unknown;
int i, n, iters;
double soln[], initVal, maxDiff;
boolean done;
int strat[] = null;
boolean genStrat, done;
long timer;
// Are we generating an optimal strategy?
genStrat = exportAdv;
// Start value iteration
timer = System.currentTimeMillis();
mainLog.println("Starting Gauss-Seidel (" + (min ? "min" : "max") + ")...");
@ -727,6 +731,14 @@ public class MDPModelChecker extends ProbModelChecker
if (known != null)
unknown.andNot(known);
// Create/initialise strategy storage
if (genStrat) {
strat = new int[n];
for (i = 0; i < n; i++) {
strat[i] = -1;
}
}
// Start iterations
iters = 0;
done = false;
@ -772,11 +784,11 @@ public class MDPModelChecker extends ProbModelChecker
double soln[], soln2[];
boolean done;
long timer;
int adv[];
int strat[];
DTMCModelChecker mcDTMC;
DTMC dtmc;
// Re-use solution to solve each new adversary?
// Re-use solution to solve each new policy (strategy)?
boolean reUseSoln = true;
// Start value iteration
@ -799,18 +811,18 @@ public class MDPModelChecker extends ProbModelChecker
for (i = 0; i < n; i++)
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
// Generate initial adversary
adv = new int[n];
// Generate initial strategy
strat = new int[n];
for (i = 0; i < n; i++)
adv[i] = 0;
strat[i] = 0;
// Start iterations
iters = totalIters = 0;
done = false;
while (!done) {
iters++;
// Solve induced DTMC for adversary
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, adv);
// Solve induced DTMC for strategy
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, reUseSoln ? soln : null, null);
soln = res.soln;
totalIters += res.numIters;
@ -818,16 +830,16 @@ public class MDPModelChecker extends ProbModelChecker
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
done = true;
for (i = 0; i < n; i++) {
// Don't look at no/yes states - we don't store adversary info for them,
// Don't look at no/yes states - we don't store strategy info for them,
// so they might appear non-optimal
if (no.get(i) || yes.get(i))
continue;
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
done = false;
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
// Only update adversary if strictly better
if (!opt.contains(adv[i]))
adv[i] = opt.get(0);
// Only update strategy if strictly better
if (!opt.contains(strat[i]))
strat[i] = opt.get(0);
}
}
}
@ -859,7 +871,7 @@ public class MDPModelChecker extends ProbModelChecker
double soln[], soln2[];
boolean done;
long timer;
int adv[];
int strat[];
DTMCModelChecker mcDTMC;
DTMC dtmc;
@ -886,18 +898,18 @@ public class MDPModelChecker extends ProbModelChecker
for (i = 0; i < n; i++)
soln[i] = soln2[i] = yes.get(i) ? 1.0 : 0.0;
// Generate initial adversary
adv = new int[n];
// Generate initial strategy
strat = new int[n];
for (i = 0; i < n; i++)
adv[i] = 0;
strat[i] = 0;
// Start iterations
iters = totalIters = 0;
done = false;
while (!done) {
iters++;
// Solve induced DTMC for adversary
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, adv);
// Solve induced DTMC for strategy
dtmc = new DTMCFromMDPMemorylessAdversary(mdp, strat);
res = mcDTMC.computeReachProbsGaussSeidel(dtmc, no, yes, soln, null);
soln = res.soln;
totalIters += res.numIters;
@ -905,14 +917,14 @@ public class MDPModelChecker extends ProbModelChecker
mdp.mvMultMinMax(soln, min, soln2, null, false, null);
done = true;
for (i = 0; i < n; i++) {
// Don't look at no/yes states - we don't store adversary info for them,
// Don't look at no/yes states - we don't store strategy info for them,
// so they might appear non-optimal
if (no.get(i) || yes.get(i))
continue;
if (!PrismUtils.doublesAreClose(soln[i], soln2[i], termCritParam, termCrit == TermCrit.ABSOLUTE)) {
done = false;
List<Integer> opt = mdp.mvMultMinMaxSingleChoices(i, soln, min, soln2[i]);
adv[i] = opt.get(0);
strat[i] = opt.get(0);
}
}
}
@ -1351,12 +1363,12 @@ public class MDPModelChecker extends ProbModelChecker
}
/**
* Restrict a (memoryless) adversary for an MDP, stored as an integer array of choice indices,
* to the states of the MDP that are reachable under that adversary.
* Restrict a (memoryless) strategy for an MDP, stored as an integer array of choice indices,
* to the states of the MDP that are reachable under that strategy.
* @param mdp The MDP
* @param adv The adversary
* @param strat The strategy
*/
public void restrictAdversaryToReachableStates(MDP mdp, int adv[])
public void restrictStrategyToReachableStates(MDP mdp, int strat[])
{
BitSet restrict = new BitSet();
BitSet explore = new BitSet();
@ -1371,8 +1383,8 @@ public class MDPModelChecker extends ProbModelChecker
foundMore = false;
for (int s = explore.nextSetBit(0); s >= 0; s = explore.nextSetBit(s + 1)) {
explore.set(s, false);
if (adv[s] >= 0) {
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, adv[s]);
if (strat[s] >= 0) {
Iterator<Map.Entry<Integer, Double>> iter = mdp.getTransitionsIterator(s, strat[s]);
while (iter.hasNext()) {
Map.Entry<Integer, Double> e = iter.next();
int dest = e.getKey();
@ -1385,10 +1397,10 @@ public class MDPModelChecker extends ProbModelChecker
}
}
}
// Set adversary choice for non-reachable state to -1
// Set strategy choice for non-reachable state to -1
int n = mdp.getNumStates();
for (int s = restrict.nextClearBit(0); s < n; s = restrict.nextClearBit(s + 1)) {
adv[s] = -1;
strat[s] = -1;
}
}

42
prism/src/explicit/MDPSimple.java

@ -607,9 +607,9 @@ public class MDPSimple extends MDPExplicit implements ModelSimple
}
@Override
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int adv[])
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int j, k, advCh = -1;
int j, k, stratCh = -1;
double d, prob, minmax;
boolean first;
List<Distribution> step;
@ -629,20 +629,20 @@ public class MDPSimple extends MDPExplicit implements ModelSimple
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If adversary generation is enabled, remember optimal choice
if (adv != null)
advCh = j;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = j;
}
first = false;
j++;
}
// If adversary generation is enabled, store optimal choice
if (adv != null & !first) {
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// For max, only remember strictly better choices
if (min) {
adv[s] = advCh;
} else if (adv[s] == -1 || minmax > vect[s]) {
adv[s] = advCh;
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
@ -760,9 +760,9 @@ public class MDPSimple extends MDPExplicit implements ModelSimple
}
@Override
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int j, k, advCh = -1;
int j, k, stratCh = -1;
double d, prob, minmax;
boolean first;
List<Distribution> step;
@ -783,17 +783,17 @@ public class MDPSimple extends MDPExplicit implements ModelSimple
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If adversary generation is enabled, remember optimal choice
if (adv != null)
advCh = j;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = j;
}
first = false;
}
// If adversary generation is enabled, store optimal choice
if (adv != null & !first) {
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// Only remember strictly better choices (required for max)
if (adv[s] == -1 || (min && minmax < vect[s]) || (!min && minmax > vect[s])) {
adv[s] = advCh;
if (strat[s] == -1 || (min && minmax < vect[s]) || (!min && minmax > vect[s])) {
strat[s] = stratCh;
}
}
// Add state reward (doesn't affect min/max)
@ -875,10 +875,10 @@ public class MDPSimple extends MDPExplicit implements ModelSimple
}
@Override
public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest)
public void mvMultRight(int[] states, int[] strat, double[] source, double[] dest)
{
for (int s : states) {
Iterator<Entry<Integer, Double>> it = this.getTransitionsIterator(s, adv[s]);
Iterator<Entry<Integer, Double>> it = this.getTransitionsIterator(s, strat[s]);
while (it.hasNext()) {
Entry<Integer, Double> next = it.next();
int col = next.getKey();

65
prism/src/explicit/MDPSparse.java

@ -27,14 +27,23 @@
package explicit;
import java.util.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.io.*;
import java.util.TreeMap;
import explicit.rewards.MDPRewards;
import parser.State;
import prism.PrismException;
import prism.PrismUtils;
import explicit.rewards.MDPRewards;
/**
* Sparse matrix (non-mutable) explicit-state representation of an MDP.
@ -384,7 +393,7 @@ public class MDPSparse extends MDPExplicit
}
return succs.iterator();
}
@Override
public boolean isSuccessor(int s1, int s2)
{
@ -631,9 +640,9 @@ public class MDPSparse extends MDPExplicit
}
@Override
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int adv[])
public double mvMultMinMaxSingle(int s, double vect[], boolean min, int strat[])
{
int j, k, l1, h1, l2, h2, advCh = -1;
int j, k, l1, h1, l2, h2, stratCh = -1;
double d, minmax;
boolean first;
@ -652,19 +661,19 @@ public class MDPSparse extends MDPExplicit
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If adversary generation is enabled, remember optimal choice
if (adv != null)
advCh = j - l1;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = j - l1;
}
first = false;
}
// If adversary generation is enabled, store optimal choice
if (adv != null & !first) {
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// For max, only remember strictly better choices
if (min) {
adv[s] = advCh;
} else if (adv[s] == -1 || minmax > vect[s]) {
adv[s] = advCh;
strat[s] = stratCh;
} else if (strat[s] == -1 || minmax > vect[s]) {
strat[s] = stratCh;
}
}
@ -680,7 +689,7 @@ public class MDPSparse extends MDPExplicit
// Create data structures to store strategy
res = new ArrayList<Integer>();
// One row of matrix-vector operation
// One row of matrix-vector operation
l1 = rowStarts[s];
h1 = rowStarts[s + 1];
for (j = l1; j < h1; j++) {
@ -779,9 +788,9 @@ public class MDPSparse extends MDPExplicit
}
@Override
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int adv[])
public double mvMultRewMinMaxSingle(int s, double vect[], MDPRewards mdpRewards, boolean min, int strat[])
{
int j, k, l1, h1, l2, h2, advCh = -1;
int j, k, l1, h1, l2, h2, stratCh = -1;
double d, minmax;
boolean first;
@ -800,17 +809,17 @@ public class MDPSparse extends MDPExplicit
// Check whether we have exceeded min/max so far
if (first || (min && d < minmax) || (!min && d > minmax)) {
minmax = d;
// If adversary generation is enabled, remember optimal choice
if (adv != null)
advCh = j - l1;
// If strategy generation is enabled, remember optimal choice
if (strat != null)
stratCh = j - l1;
}
first = false;
}
// If adversary generation is enabled, store optimal choice
if (adv != null & !first) {
// If strategy generation is enabled, store optimal choice
if (strat != null & !first) {
// Only remember strictly better choices (required for max)
if (adv[s] == -1 || (min && minmax < vect[s]) || (!min && minmax > vect[s])) {
adv[s] = advCh;
if (strat[s] == -1 || (min && minmax < vect[s]) || (!min && minmax > vect[s])) {
strat[s] = stratCh;
}
}
// Add state reward (doesn't affect min/max)
@ -865,7 +874,7 @@ public class MDPSparse extends MDPExplicit
// Create data structures to store strategy
res = new ArrayList<Integer>();
// One row of matrix-vector operation
// One row of matrix-vector operation
l1 = rowStarts[s];
h1 = rowStarts[s + 1];
for (j = l1; j < h1; j++) {
@ -886,11 +895,11 @@ public class MDPSparse extends MDPExplicit
}
@Override
public void mvMultRight(int[] states, int[] adv, double[] source, double[] dest)
public void mvMultRight(int[] states, int[] strat, double[] source, double[] dest)
{
for (int s : states) {
int j, l2, h2;
int k = adv[s];
int k = strat[s];
j = rowStarts[s] + k;
l2 = choiceStarts[j];
h2 = choiceStarts[j + 1];
@ -899,7 +908,7 @@ public class MDPSparse extends MDPExplicit
}
}
}
// Standard methods
@Override

Loading…
Cancel
Save