Browse Source

Refactoring + tidying in multi-objective value iteration.

git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@10883 bbc10eb1-c90d-0410-af57-cb519fbb1720
master
Dave Parker 11 years ago
parent
commit
91c8af3aac
  1. 59
      prism/src/sparse/PS_NondetMultiObj.cc
  2. 105
      prism/src/sparse/PS_NondetMultiObjGS.cc

59
prism/src/sparse/PS_NondetMultiObj.cc

@ -112,34 +112,35 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
int start_index;
unsigned int *row_starts1, *predecessors;
unsigned int extra_node;
//in this we will store the result array, 0 returned means error
// storage for result array (0 means error)
jdoubleArray ret = 0;
// local copy of max_iters, since we will change it
int max_iters_local = max_iters;
// Extract some info about objectives
bool has_rewards = _ndsm_r != 0;
bool has_yes_vec = _yes_vec != 0;
//we will change maximal number of iters, so make sure we don't change the global number
int max_iters_local = max_iters;
jsize lenRew = (has_rewards) ? env->GetArrayLength(_ndsm_r) : 0;
jsize lenProb = (has_yes_vec) ? env->GetArrayLength(_yes_vec) : 0;
jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL;
jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL;
double* weights = env->GetDoubleArrayElements(_weights, 0);
int* step_bounds_r = (has_rewards) ? (int*)env->GetIntArrayElements(_ndsm_r_step_bounds, 0) : NULL;
int* step_bounds = (has_yes_vec) ? (int*)env->GetIntArrayElements(_prob_step_bounds, 0) : NULL;
//We will ignore one of the rewards and compute it's value from the other ones and
//from the combined value. We must make sure that this reward has nonzero weight,
//otherwise we can't compute it.
// We will ignore one of the rewards and compute its value from the other ones and
// from the combined value. We must make sure that this reward has nonzero weight,
// otherwise we can't compute it.
int ignoredWeight = -1;
/* HOTFIX: not used for numerical problems
/* HOTFIX: not used for numerical problems
for (i = lenProb + lenRew - 1; i>=0; i--) {
if (weights[i] > 0) {
ignoredWeight = i;
break;
}
}*/
//determine the minimal nonzero weight
double min_weight = 1;
for (i = 0; i < lenProb + lenRew; i++)
@ -171,17 +172,11 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
kb = ndsm->mem;
kbt = kb;
jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL;
jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL;
NDSparseMatrix *ndsm_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
ndsm_r[rewi] = (NDSparseMatrix *) jlong_to_NDSparseMatrix(ptr_ndsm_r[rewi]);
int* step_bounds_r = (has_rewards) ? (int*)env->GetIntArrayElements(_ndsm_r_step_bounds, 0) : NULL;
int* step_bounds = (has_yes_vec) ? (int*)env->GetIntArrayElements(_prob_step_bounds, 0) : NULL;
int max_step_bound = 0;
for(int rewi = 0; rewi < lenRew; rewi++) {
if (step_bounds_r[rewi] == -1)
@ -217,7 +212,6 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
// create solution/iteration vectors
soln = new double[n];
soln2 = new double[n];
psoln = new double *[lenProb + lenRew];
psoln2 = new double *[lenProb + lenRew];
for (int it = 0; it < lenProb + lenRew ; it++) {
@ -226,7 +220,6 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
psoln2[it] = new double[n];
}
}
pd1 = new double[lenProb + lenRew];
pd2 = new double[lenProb + lenRew];
@ -250,19 +243,17 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
soln[i] = 0;
soln2[i] = 0;
for (int probi = 0; probi < lenProb; probi++) {
if (step_bounds[probi] == max_iters_local)
if (step_bounds[probi] == max_iters_local) {
soln[i] += weights[probi] * yes_vec[probi][i];
//PS_PrintToMainLog(env, "s: %d %d %f %f %f\n", i, probi, soln[i], weights[probi], yes_vec[probi][i]);
}
}
for (int probi = 0; probi < lenProb; probi++) {
if (probi != ignoredWeight) {
if (step_bounds[probi] == max_iters_local) {
//PS_PrintToMainLog(env, "setting %d psoln to some number\n", probi);
psoln[probi][i] = 0;//yes_vec[probi][i];
}
else {
psoln[probi][i] = 0;
//PS_PrintToMainLog(env, "setting %d psoln to 0\n", probi);
}
psoln2[probi][i] = 0;
}
@ -284,11 +275,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
for (int it = 0; it < lenRew + lenProb; it++) {
PS_PrintToMainLog(env, "Initial psoln: ");
if (ignoredWeight != it)
if (it != ignoredWeight) {
PS_PrintToMainLog(env, "psoln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%f, ", psoln[it][o]);
PS_PrintToMainLog(env, "\n");
PS_PrintToMainLog(env, "\n");
} else {
PS_PrintToMainLog(env, "psoln: (ignored)\n");
}
}
#endif
// get setup time
@ -351,14 +345,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
// loop through states
for (i = 0; i < n; i++) {
//first, get the decision of the adversary optimizing the combined reward
first = true;
// first, get the decision of the adversary optimizing the combined reward
d1 = -INFINITY;
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd1[it] = -INFINITY;
first = true; // (because we also remember 'first')
// get pointers to nondeterministic choices for state i
if (!use_counts) { l1 = row_starts[i]; h1 = row_starts[i+1]; }
else { l1 = h1; h1 += row_counts[i]; }
@ -403,8 +397,9 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
// add prob * corresponding reward from previous iteration
// (for both combined and individual rewards)
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight)
if (it != ignoredWeight) {
pd2[it] += non_zeros[k] * psoln[it][cols[k]];
}
}
d2 += non_zeros[k] * soln[cols[k]];
}

105
prism/src/sparse/PS_NondetMultiObjGS.cc

@ -102,7 +102,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
// misc
int i, j, k, l1, h1, l2, h2, iters;
int *k_r = NULL, *l2_r = NULL, *h2_r = NULL;
double d1, d2, kb, kbt;
double d1, d2, max_diff, kb, kbt;
double *pd1 = NULL, *pd2 = NULL;
bool done, weightedDone, first;
const char** action_names;
@ -110,27 +110,24 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
int start_index;
unsigned int *row_starts1, *predecessors;
unsigned int extra_node;
//in this we will store the result array, 0 returned means error
// storage for result array (0 means error)
jdoubleArray ret = 0;
// Extract some info about objectives
bool has_rewards = _ndsm_r != 0;
bool has_yes_vec = _yes_vec != 0;
double max_diff;
double tmp_result;
jsize lenRew = (has_rewards) ? env->GetArrayLength(_ndsm_r) : 0;
jsize lenProb = (has_yes_vec) ? env->GetArrayLength(_yes_vec) : 0;
jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL;
jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL;
double* weights = env->GetDoubleArrayElements(_weights, 0);
//We will ignore one of the rewards and compute it's value from the other ones and
//from the combined value. We must make sure that this reward has nonzero weight,
//otherwise we can't compute it.
// We will ignore one of the rewards and compute its value from the other ones and
// from the combined value. We must make sure that this reward has nonzero weight,
// otherwise we can't compute it.
int ignoredWeight = -1;
/* HOTFIX not used for numerical problems
/* HOTFIX: not used for numerical problems
for (i = lenProb + lenRew - 1; i>=0; i--) {
if (weights[i] > 0) {
ignoredWeight = i;
@ -162,35 +159,28 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
kb = ndsm->mem;
kbt = kb;
jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL;
jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL;
NDSparseMatrix *ndsm_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
ndsm_r[rewi] = (NDSparseMatrix *) jlong_to_NDSparseMatrix(ptr_ndsm_r[rewi]);
double* weights = env->GetDoubleArrayElements(_weights, 0);
// if needed, and if info is available, build a vector of action indices for the MDP
actions = NULL;
// get vector for yes
yes_vec = new double *[lenProb];
for (int probi = 0; probi < lenProb; probi++)
for (int probi = 0; probi < lenProb; probi++) {
yes_vec[probi] = (double *) jlong_to_ptr(ptr_yes_vec[probi]);
}
kb = n*8.0/1024.0;
kbt += kb;
// create solution/iteration vectors
soln = new double[n];
psoln = new double *[lenProb + lenRew];
for (int it = 0; it < lenProb + lenRew ; it++)
if (it != ignoredWeight)
for (int it = 0; it < lenProb + lenRew ; it++) {
if (it != ignoredWeight) {
psoln[it] = new double[n];
}
}
pd1 = new double[lenProb + lenRew];
pd2 = new double[lenProb + lenRew];
@ -203,20 +193,23 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
// initial solution is yes
for (i = 0; i < n; i++) {
soln[i] = 0;
for (int probi = 0; probi < lenProb; probi++)
for (int probi = 0; probi < lenProb; probi++) {
soln[i] += weights[probi] * yes_vec[probi][i];
for (int probi = 0; probi < lenProb; probi++)
if (probi != ignoredWeight)
}
for (int probi = 0; probi < lenProb; probi++) {
if (probi != ignoredWeight) {
psoln[probi][i] = 0;//yes_vec[probi][i];
for (int rewi = 0; rewi < lenRew; rewi++)
if (lenProb + rewi != ignoredWeight)
}
}
for (int rewi = 0; rewi < lenRew; rewi++) {
if (lenProb + rewi != ignoredWeight) {
psoln[rewi + lenProb][i] = 0;
}
}
}
#ifdef MORE_OUTPUT
PS_PrintToMainLog(env, "soln: ");
PS_PrintToMainLog(env, "Initial soln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%f, ", soln[o]);
PS_PrintToMainLog(env, "\n");
@ -269,39 +262,44 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
for(int rewi = 0; rewi < lenRew; rewi++)
cols_r[rewi] = ndsm_r[rewi]->cols;
bool doneBeforeBounded;
h2_r = new int[lenRew];
l2_r = new int[lenRew];
k_r = new int[lenRew];
while (!done && iters < max_iters) {
iters++;
max_diff = 0;
// do matrix multiplication and min/max
h1 = h2 = 0;
for (int rewi = 0; rewi < lenRew; rewi++)
h2_r[rewi] = 0;
max_diff = 0;
// loop through states
for (i = 0; i < n; i++) {
first = true;
//first, get the decision of the adversary optimizing the combined reward
// first, get the decision of the adversary optimizing the combined reward
d1 = -INFINITY;
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd1[it] = -INFINITY;
first = true; // (because we also remember 'first')
// get pointers to nondeterministic choices for state i
if (!use_counts) { l1 = row_starts[i]; h1 = row_starts[i+1]; }
else { l1 = h1; h1 += row_counts[i]; }
// loop through those choices
for (j = l1; j < h1; j++) {
// compute, for state i for this iteration,
// the combined and individual reward values
// start with 0 (we don't have any state rewards)
d2 = 0;
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd2[it] = 0;
// get pointers to transitions
if (!use_counts) { l2 = choice_starts[j]; h2 = choice_starts[j+1]; }
else { l2 = h2; h2 += choice_counts[j]; }
// and get pointers to transition rewards
for (int rewi = 0; rewi < lenRew; rewi++) {
if (!ndsm_r[rewi]->use_counts) {
l2_r[rewi] = choice_starts_r[rewi][j];
@ -311,12 +309,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
h2_r[rewi] += choice_counts_r[rewi][j];
}
}
// loop through transitions
for (k = l2; k < h2; k++) {
// for each reward structure
for (int rewi = 0; rewi < lenRew; rewi++) {
// find corresponding transition reward if any
k_r[rewi] = l2_r[rewi];
while (k_r[rewi] < h2_r[rewi] && cols_r[rewi][k_r[rewi]] != cols[k])
k_r[rewi]++;
// if there is one, add reward * prob to reward value
while (k_r[rewi] < h2_r[rewi] && cols_r[rewi][k_r[rewi]] != cols[k]) k_r[rewi]++;
// if there is one, add reward * prob to combined and individual reward values
if (k_r[rewi] < h2_r[rewi]) {
d2 += weights[rewi + lenProb] * non_zeros_r[rewi][k_r[rewi]] * non_zeros[k];
if (lenProb + rewi != ignoredWeight) {
@ -325,15 +325,16 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
k_r[rewi]++;
}
}
//add value of successors
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
// add prob * corresponding reward from previous iteration
// (for both combined and individual rewards)
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
pd2[it] += non_zeros[k] * psoln[it][cols[k]];
}
}
d2 += non_zeros[k] * soln[cols[k]];
}
// see if the combined reward value is the min/max so far
bool pickThis = first || (min&&(d2<d1)) || (!min&&(d2>d1));
//HOTFIX for cumulative reward
@ -349,6 +350,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
}
if (pickThis) {
// store optimal values for combined and individual rewards
if (fabs(d2) < near_zero)
d1 = 0;
else
@ -364,7 +366,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
}
first = false;
}
double val_yes = 0.0;
for (int probi = 0; probi < lenProb; probi++) {
if (probi != ignoredWeight && yes_vec[probi]!=NULL);
@ -497,12 +499,11 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet
PS_PrintToMainLog(env, "Iterative method: %d iterations in %.2f seconds (average %.6f, setup %.2f)\n", iters, time_taken, time_for_iters/iters, time_for_setup);
// if the iterative method didn't terminate, this is an error
if (iters == max_iters)
{
if (iters == max_iters) {
PS_SetErrorMessage("Iterative method did not converge within %d iterations.\nConsider using a different numerical method or increasing the maximum number of iterations", iters);
throw 1;
}
//store the result
ret = env->NewDoubleArray(lenProb + lenRew);
jdouble *retNative = env->GetDoubleArrayElements(ret, 0);

Loading…
Cancel
Save