From 91c8af3aac7d4478aa1d93529572d33c1f5b8369 Mon Sep 17 00:00:00 2001 From: Dave Parker Date: Mon, 16 Nov 2015 10:52:15 +0000 Subject: [PATCH] Refactoring + tidying in multi-objective value iteration. git-svn-id: https://www.prismmodelchecker.org/svn/prism/prism/trunk@10883 bbc10eb1-c90d-0410-af57-cb519fbb1720 --- prism/src/sparse/PS_NondetMultiObj.cc | 59 ++++++------- prism/src/sparse/PS_NondetMultiObjGS.cc | 105 ++++++++++++------------ 2 files changed, 80 insertions(+), 84 deletions(-) diff --git a/prism/src/sparse/PS_NondetMultiObj.cc b/prism/src/sparse/PS_NondetMultiObj.cc index a20871f7..43d66566 100644 --- a/prism/src/sparse/PS_NondetMultiObj.cc +++ b/prism/src/sparse/PS_NondetMultiObj.cc @@ -112,34 +112,35 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet int start_index; unsigned int *row_starts1, *predecessors; unsigned int extra_node; - - //in this we will store the result array, 0 returned means error + // storage for result array (0 means error) jdoubleArray ret = 0; + // local copy of max_iters, since we will change it + int max_iters_local = max_iters; + // Extract some info about objectives bool has_rewards = _ndsm_r != 0; bool has_yes_vec = _yes_vec != 0; - - //we will change maximal number of iters, so make sure we don't change the global number - int max_iters_local = max_iters; - jsize lenRew = (has_rewards) ? env->GetArrayLength(_ndsm_r) : 0; jsize lenProb = (has_yes_vec) ? env->GetArrayLength(_yes_vec) : 0; - + jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL; + jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL; double* weights = env->GetDoubleArrayElements(_weights, 0); + int* step_bounds_r = (has_rewards) ? (int*)env->GetIntArrayElements(_ndsm_r_step_bounds, 0) : NULL; + int* step_bounds = (has_yes_vec) ? (int*)env->GetIntArrayElements(_prob_step_bounds, 0) : NULL; - //We will ignore one of the rewards and compute it's value from the other ones and - //from the combined value. We must make sure that this reward has nonzero weight, - //otherwise we can't compute it. + // We will ignore one of the rewards and compute its value from the other ones and + // from the combined value. We must make sure that this reward has nonzero weight, + // otherwise we can't compute it. int ignoredWeight = -1; - - /* HOTFIX: not used for numerical problems + + /* HOTFIX: not used for numerical problems for (i = lenProb + lenRew - 1; i>=0; i--) { if (weights[i] > 0) { ignoredWeight = i; break; } }*/ - + //determine the minimal nonzero weight double min_weight = 1; for (i = 0; i < lenProb + lenRew; i++) @@ -171,17 +172,11 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet kb = ndsm->mem; kbt = kb; - jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL; - jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL; - NDSparseMatrix *ndsm_r[lenRew]; for(int rewi = 0; rewi < lenRew; rewi++) ndsm_r[rewi] = (NDSparseMatrix *) jlong_to_NDSparseMatrix(ptr_ndsm_r[rewi]); - int* step_bounds_r = (has_rewards) ? (int*)env->GetIntArrayElements(_ndsm_r_step_bounds, 0) : NULL; - int* step_bounds = (has_yes_vec) ? (int*)env->GetIntArrayElements(_prob_step_bounds, 0) : NULL; - int max_step_bound = 0; for(int rewi = 0; rewi < lenRew; rewi++) { if (step_bounds_r[rewi] == -1) @@ -217,7 +212,6 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet // create solution/iteration vectors soln = new double[n]; soln2 = new double[n]; - psoln = new double *[lenProb + lenRew]; psoln2 = new double *[lenProb + lenRew]; for (int it = 0; it < lenProb + lenRew ; it++) { @@ -226,7 +220,6 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet psoln2[it] = new double[n]; } } - pd1 = new double[lenProb + lenRew]; pd2 = new double[lenProb + lenRew]; @@ -250,19 +243,17 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet soln[i] = 0; soln2[i] = 0; for (int probi = 0; probi < lenProb; probi++) { - if (step_bounds[probi] == max_iters_local) + if (step_bounds[probi] == max_iters_local) { soln[i] += weights[probi] * yes_vec[probi][i]; - //PS_PrintToMainLog(env, "s: %d %d %f %f %f\n", i, probi, soln[i], weights[probi], yes_vec[probi][i]); + } } for (int probi = 0; probi < lenProb; probi++) { if (probi != ignoredWeight) { if (step_bounds[probi] == max_iters_local) { - //PS_PrintToMainLog(env, "setting %d psoln to some number\n", probi); psoln[probi][i] = 0;//yes_vec[probi][i]; } else { psoln[probi][i] = 0; - //PS_PrintToMainLog(env, "setting %d psoln to 0\n", probi); } psoln2[probi][i] = 0; } @@ -284,11 +275,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet for (int it = 0; it < lenRew + lenProb; it++) { - PS_PrintToMainLog(env, "Initial psoln: "); - if (ignoredWeight != it) + if (it != ignoredWeight) { + PS_PrintToMainLog(env, "psoln: "); for (int o = 0; o < n; o++) PS_PrintToMainLog(env, "%f, ", psoln[it][o]); - PS_PrintToMainLog(env, "\n"); + PS_PrintToMainLog(env, "\n"); + } else { + PS_PrintToMainLog(env, "psoln: (ignored)\n"); + } } #endif // get setup time @@ -351,14 +345,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet // loop through states for (i = 0; i < n; i++) { - //first, get the decision of the adversary optimizing the combined reward + first = true; + + // first, get the decision of the adversary optimizing the combined reward d1 = -INFINITY; for (int it = 0; it < lenRew + lenProb; it++) if (it != ignoredWeight) pd1[it] = -INFINITY; - first = true; // (because we also remember 'first') - // get pointers to nondeterministic choices for state i if (!use_counts) { l1 = row_starts[i]; h1 = row_starts[i+1]; } else { l1 = h1; h1 += row_counts[i]; } @@ -403,8 +397,9 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet // add prob * corresponding reward from previous iteration // (for both combined and individual rewards) for (int it = 0; it < lenRew + lenProb; it++) { - if (it != ignoredWeight) + if (it != ignoredWeight) { pd2[it] += non_zeros[k] * psoln[it][cols[k]]; + } } d2 += non_zeros[k] * soln[cols[k]]; } diff --git a/prism/src/sparse/PS_NondetMultiObjGS.cc b/prism/src/sparse/PS_NondetMultiObjGS.cc index 91e70c5e..782220ac 100644 --- a/prism/src/sparse/PS_NondetMultiObjGS.cc +++ b/prism/src/sparse/PS_NondetMultiObjGS.cc @@ -102,7 +102,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet // misc int i, j, k, l1, h1, l2, h2, iters; int *k_r = NULL, *l2_r = NULL, *h2_r = NULL; - double d1, d2, kb, kbt; + double d1, d2, max_diff, kb, kbt; double *pd1 = NULL, *pd2 = NULL; bool done, weightedDone, first; const char** action_names; @@ -110,27 +110,24 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet int start_index; unsigned int *row_starts1, *predecessors; unsigned int extra_node; - - //in this we will store the result array, 0 returned means error + // storage for result array (0 means error) jdoubleArray ret = 0; + // Extract some info about objectives bool has_rewards = _ndsm_r != 0; bool has_yes_vec = _yes_vec != 0; - - double max_diff; - double tmp_result; - jsize lenRew = (has_rewards) ? env->GetArrayLength(_ndsm_r) : 0; jsize lenProb = (has_yes_vec) ? env->GetArrayLength(_yes_vec) : 0; - + jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL; + jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL; double* weights = env->GetDoubleArrayElements(_weights, 0); - //We will ignore one of the rewards and compute it's value from the other ones and - //from the combined value. We must make sure that this reward has nonzero weight, - //otherwise we can't compute it. + // We will ignore one of the rewards and compute its value from the other ones and + // from the combined value. We must make sure that this reward has nonzero weight, + // otherwise we can't compute it. int ignoredWeight = -1; - /* HOTFIX not used for numerical problems + /* HOTFIX: not used for numerical problems for (i = lenProb + lenRew - 1; i>=0; i--) { if (weights[i] > 0) { ignoredWeight = i; @@ -162,35 +159,28 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet kb = ndsm->mem; kbt = kb; - jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL; - jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL; - NDSparseMatrix *ndsm_r[lenRew]; for(int rewi = 0; rewi < lenRew; rewi++) ndsm_r[rewi] = (NDSparseMatrix *) jlong_to_NDSparseMatrix(ptr_ndsm_r[rewi]); - double* weights = env->GetDoubleArrayElements(_weights, 0); - - // if needed, and if info is available, build a vector of action indices for the MDP - actions = NULL; - // get vector for yes yes_vec = new double *[lenProb]; - for (int probi = 0; probi < lenProb; probi++) + for (int probi = 0; probi < lenProb; probi++) { yes_vec[probi] = (double *) jlong_to_ptr(ptr_yes_vec[probi]); + } kb = n*8.0/1024.0; kbt += kb; // create solution/iteration vectors soln = new double[n]; - psoln = new double *[lenProb + lenRew]; - for (int it = 0; it < lenProb + lenRew ; it++) - if (it != ignoredWeight) + for (int it = 0; it < lenProb + lenRew ; it++) { + if (it != ignoredWeight) { psoln[it] = new double[n]; - + } + } pd1 = new double[lenProb + lenRew]; pd2 = new double[lenProb + lenRew]; @@ -203,20 +193,23 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet // initial solution is yes for (i = 0; i < n; i++) { soln[i] = 0; - for (int probi = 0; probi < lenProb; probi++) + for (int probi = 0; probi < lenProb; probi++) { soln[i] += weights[probi] * yes_vec[probi][i]; - - for (int probi = 0; probi < lenProb; probi++) - if (probi != ignoredWeight) + } + for (int probi = 0; probi < lenProb; probi++) { + if (probi != ignoredWeight) { psoln[probi][i] = 0;//yes_vec[probi][i]; - - for (int rewi = 0; rewi < lenRew; rewi++) - if (lenProb + rewi != ignoredWeight) + } + } + for (int rewi = 0; rewi < lenRew; rewi++) { + if (lenProb + rewi != ignoredWeight) { psoln[rewi + lenProb][i] = 0; + } + } } #ifdef MORE_OUTPUT - PS_PrintToMainLog(env, "soln: "); + PS_PrintToMainLog(env, "Initial soln: "); for (int o = 0; o < n; o++) PS_PrintToMainLog(env, "%f, ", soln[o]); PS_PrintToMainLog(env, "\n"); @@ -269,39 +262,44 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet for(int rewi = 0; rewi < lenRew; rewi++) cols_r[rewi] = ndsm_r[rewi]->cols; - bool doneBeforeBounded; - h2_r = new int[lenRew]; l2_r = new int[lenRew]; k_r = new int[lenRew]; while (!done && iters < max_iters) { - iters++; + max_diff = 0; // do matrix multiplication and min/max h1 = h2 = 0; for (int rewi = 0; rewi < lenRew; rewi++) h2_r[rewi] = 0; - max_diff = 0; + // loop through states for (i = 0; i < n; i++) { + first = true; - //first, get the decision of the adversary optimizing the combined reward + // first, get the decision of the adversary optimizing the combined reward d1 = -INFINITY; for (int it = 0; it < lenRew + lenProb; it++) if (it != ignoredWeight) pd1[it] = -INFINITY; - first = true; // (because we also remember 'first') + // get pointers to nondeterministic choices for state i if (!use_counts) { l1 = row_starts[i]; h1 = row_starts[i+1]; } else { l1 = h1; h1 += row_counts[i]; } + // loop through those choices for (j = l1; j < h1; j++) { + // compute, for state i for this iteration, + // the combined and individual reward values + // start with 0 (we don't have any state rewards) d2 = 0; for (int it = 0; it < lenRew + lenProb; it++) if (it != ignoredWeight) pd2[it] = 0; + // get pointers to transitions if (!use_counts) { l2 = choice_starts[j]; h2 = choice_starts[j+1]; } else { l2 = h2; h2 += choice_counts[j]; } + // and get pointers to transition rewards for (int rewi = 0; rewi < lenRew; rewi++) { if (!ndsm_r[rewi]->use_counts) { l2_r[rewi] = choice_starts_r[rewi][j]; @@ -311,12 +309,14 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet h2_r[rewi] += choice_counts_r[rewi][j]; } } + // loop through transitions for (k = l2; k < h2; k++) { + // for each reward structure for (int rewi = 0; rewi < lenRew; rewi++) { + // find corresponding transition reward if any k_r[rewi] = l2_r[rewi]; - while (k_r[rewi] < h2_r[rewi] && cols_r[rewi][k_r[rewi]] != cols[k]) - k_r[rewi]++; - // if there is one, add reward * prob to reward value + while (k_r[rewi] < h2_r[rewi] && cols_r[rewi][k_r[rewi]] != cols[k]) k_r[rewi]++; + // if there is one, add reward * prob to combined and individual reward values if (k_r[rewi] < h2_r[rewi]) { d2 += weights[rewi + lenProb] * non_zeros_r[rewi][k_r[rewi]] * non_zeros[k]; if (lenProb + rewi != ignoredWeight) { @@ -325,15 +325,16 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet k_r[rewi]++; } } - - //add value of successors - for (int it = 0; it < lenRew + lenProb; it++) - if (it != ignoredWeight) + // add prob * corresponding reward from previous iteration + // (for both combined and individual rewards) + for (int it = 0; it < lenRew + lenProb; it++) { + if (it != ignoredWeight) { pd2[it] += non_zeros[k] * psoln[it][cols[k]]; - + } + } d2 += non_zeros[k] * soln[cols[k]]; } - + // see if the combined reward value is the min/max so far bool pickThis = first || (min&&(d2d1)); //HOTFIX for cumulative reward @@ -349,6 +350,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet } if (pickThis) { + // store optimal values for combined and individual rewards if (fabs(d2) < near_zero) d1 = 0; else @@ -364,7 +366,7 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet } first = false; } - + double val_yes = 0.0; for (int probi = 0; probi < lenProb; probi++) { if (probi != ignoredWeight && yes_vec[probi]!=NULL); @@ -497,12 +499,11 @@ JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1Nondet PS_PrintToMainLog(env, "Iterative method: %d iterations in %.2f seconds (average %.6f, setup %.2f)\n", iters, time_taken, time_for_iters/iters, time_for_setup); // if the iterative method didn't terminate, this is an error - if (iters == max_iters) - { + if (iters == max_iters) { PS_SetErrorMessage("Iterative method did not converge within %d iterations.\nConsider using a different numerical method or increasing the maximum number of iterations", iters); throw 1; } - + //store the result ret = env->NewDoubleArray(lenProb + lenRew); jdouble *retNative = env->GetDoubleArrayElements(ret, 0);