You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

559 lines
17 KiB

//==============================================================================
//
// Copyright (c) 2002-
// Authors:
// * Vojtech Forejt <vojtech.forejt@cs.ox.ac.uk> (University of Oxford)
// * Dave Parker <d.a.parker@cs.bham.ac.uk> (University of Birmingham/Oxford)
//
//------------------------------------------------------------------------------
//
// This file is part of PRISM.
//
// PRISM is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// PRISM is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with PRISM; if not, write to the Free Software Foundation,
// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//==============================================================================
// includes
#include "PrismSparse.h"
#include <math.h>
#include <util.h>
#include <cudd.h>
#include <dd.h>
#include <odd.h>
#include <dv.h>
#include "sparse.h"
#include "prism.h"
#include "PrismNativeGlob.h"
#include "PrismSparseGlob.h"
#include "jnipointer.h"
#include <new>
//The following gives more output on stdout. In fact quite a lot of it, usable only for ~10 state examples
//#define MORE_OUTPUT
//The following number is used to determine when to consider a number equal to 0.
//Will be multiplied by minimal weights to make sure we don't do too much roundoffs for small weights
#define ZERO_ROUNDOFF 10e-11
JNIEXPORT jdoubleArray __jlongpointer JNICALL Java_sparse_PrismSparse_PS_1NondetMultiObjGS
(
JNIEnv *env,
jclass cls,
jlong __jlongpointer od, // odd
jlong __jlongpointer rv, // row vars
jint num_rvars,
jlong __jlongpointer cv, // col vars
jint num_cvars,
jlong __jlongpointer ndv, // nondet vars
jint num_ndvars,
jboolean min, // min or max probabilities (true = min, false = max)
jlong __jlongpointer _start, // initial state(s)
jlong _adversary,
jlong __jlongpointer _ndsm, //pointer to trans sparse matrix
jlongArray _yes_vec, //pointer to yes vector array
jlongArray _ndsm_r, //pointer to reward sparse matrix array
jdoubleArray _weights //weights of rewards and yes_vec vectors
)
{
// cast function parameters
ODDNode *odd = jlong_to_ODDNode(od); // reachable states
DdNode **rvars = jlong_to_DdNode_array(rv); // row vars
DdNode **cvars = jlong_to_DdNode_array(cv); // col vars
DdNode **ndvars = jlong_to_DdNode_array(ndv); // nondet vars
DdNode *start = jlong_to_DdNode(_start); // initial state(s)
// mtbdds
DdNode *a = NULL, *tmp = NULL;
// model stats
int n, nc;
long nnz;
// sparse matrix
NDSparseMatrix *ndsm = NULL;
NDSparseMatrix **ndsm_r = NULL;
// vectors
double **yes_vec = NULL;
double *soln = NULL, *tmpsoln = NULL;
double **psoln = NULL;
// timing stuff
long start1, start2, start3, stop;
double time_taken, time_for_setup, time_for_iters;
// adversary stuff
int export_adv_enabled = export_adv;
FILE *fp_adv = NULL;
int adv_j;
//int *adv = NULL;
// action info
int *actions = NULL;
jstring *action_names_jstrings;
int num_actions;
// misc
int i, j, k, l1, h1, l2, h2, iters;
int *k_r = NULL, *l2_r = NULL, *h2_r = NULL;
double d1, d2, kb, kbt;
double *pd1 = NULL, *pd2 = NULL;
bool done, weightedDone, first;
const char** action_names;
int num_yes = 0;
int start_index;
unsigned int *row_starts1, *predecessors;
unsigned int extra_node;
//in this we will store the result array, 0 returned means error
jdoubleArray ret = 0;
bool has_rewards = _ndsm_r != 0;
bool has_yes_vec = _yes_vec != 0;
double max_diff;
double tmp_result;
jsize lenRew = (has_rewards) ? env->GetArrayLength(_ndsm_r) : 0;
jsize lenProb = (has_yes_vec) ? env->GetArrayLength(_yes_vec) : 0;
double* weights = env->GetDoubleArrayElements(_weights, 0);
//We will ignore one of the rewards and compute it's value from the other ones and
//from the combined value. We must make sure that this reward has nonzero weight,
//otherwise we can't compute it.
int ignoredWeight = -1;
/* HOTFIX not used for numerical problems
for (i = lenProb + lenRew - 1; i>=0; i--) {
if (weights[i] > 0) {
ignoredWeight = i;
break;
}
}*/
//determine the minimal nonzero weight
double min_weight = 1;
for (i = 0; i < lenProb + lenRew; i++)
if (weights[i] > 0 && weights[i] < min_weight)
min_weight = weights[i];
double near_zero = min_weight * ZERO_ROUNDOFF;
// exception handling around whole function
try {
// start clocks
start1 = start2 = util_cpu_time();
// get number of states
n = odd->eoff + odd->toff;
// build sparse matrix
ndsm = (NDSparseMatrix *) jlong_to_NDSparseMatrix(_ndsm);
// get number of transitions/choices
nnz = ndsm->nnz;
nc = ndsm->nc;
kb = ndsm->mem;
kbt = kb;
jlong *ptr_ndsm_r = (has_rewards) ? env->GetLongArrayElements(_ndsm_r, 0) : NULL;
jlong *ptr_yes_vec = (has_yes_vec) ? env->GetLongArrayElements(_yes_vec, 0) : NULL;
NDSparseMatrix *ndsm_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
ndsm_r[rewi] = (NDSparseMatrix *) jlong_to_NDSparseMatrix(ptr_ndsm_r[rewi]);
double* weights = env->GetDoubleArrayElements(_weights, 0);
// if needed, and if info is available, build a vector of action indices for the MDP
actions = NULL;
// get vector for yes
yes_vec = new double *[lenProb];
for (int probi = 0; probi < lenProb; probi++)
yes_vec[probi] = (double *) jlong_to_ptr(ptr_yes_vec[probi]);
kb = n*8.0/1024.0;
kbt += kb;
// create solution/iteration vectors
soln = new double[n];
psoln = new double *[lenProb + lenRew];
for (int it = 0; it < lenProb + lenRew ; it++)
if (it != ignoredWeight)
psoln[it] = new double[n];
pd1 = new double[lenProb + lenRew];
pd2 = new double[lenProb + lenRew];
kb = n*8.0/1024.0;
kbt += 2*kb;
// Get index of single (first) initial state
start_index = get_index_of_first_from_bdd(ddman, start, rvars, num_rvars, odd);
// initial solution is yes
for (i = 0; i < n; i++) {
soln[i] = 0;
for (int probi = 0; probi < lenProb; probi++)
soln[i] += weights[probi] * yes_vec[probi][i];
for (int probi = 0; probi < lenProb; probi++)
if (probi != ignoredWeight)
psoln[probi][i] = 0;//yes_vec[probi][i];
for (int rewi = 0; rewi < lenRew; rewi++)
if (lenProb + rewi != ignoredWeight)
psoln[rewi + lenProb][i] = 0;
}
#ifdef MORE_OUTPUT
PS_PrintToMainLog(env, "soln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%f, ", soln[o]);
PS_PrintToMainLog(env, "\n");
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
PS_PrintToMainLog(env, "psoln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%f, ", psoln[it][o]);
PS_PrintToMainLog(env, "\n");
} else {
PS_PrintToMainLog(env, "psoln: (ignored)\n");
}
}
#endif
// get setup time
stop = util_cpu_time();
time_for_setup = (double)(stop - start2)/1000;
start2 = stop;
// start iterations
iters = 0;
done = false;
weightedDone = false;
//PS_PrintToMainLog(env, "Starting iterations...\n");
// store local copies of stuff
double *non_zeros = ndsm->non_zeros;
unsigned char *row_counts = ndsm->row_counts;
int *row_starts = (int *)ndsm->row_counts;
unsigned char *choice_counts = ndsm->choice_counts;
int *choice_starts = (int *)ndsm->choice_counts;
bool use_counts = ndsm->use_counts;
unsigned int *cols = ndsm->cols;
double *non_zeros_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
non_zeros_r[rewi] = ndsm_r[rewi]->non_zeros;
unsigned char *choice_counts_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
choice_counts_r[rewi] = ndsm_r[rewi]->choice_counts;
int *choice_starts_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
choice_starts_r[rewi] = (int*)ndsm_r[rewi]->choice_counts;
unsigned int *cols_r[lenRew];
for(int rewi = 0; rewi < lenRew; rewi++)
cols_r[rewi] = ndsm_r[rewi]->cols;
bool doneBeforeBounded;
h2_r = new int[lenRew];
l2_r = new int[lenRew];
k_r = new int[lenRew];
while (!done && iters < max_iters) {
iters++;
// do matrix multiplication and min/max
h1 = h2 = 0;
for (int rewi = 0; rewi < lenRew; rewi++)
h2_r[rewi] = 0;
max_diff = 0;
for (i = 0; i < n; i++) {
//first, get the decision of the adversary optimizing the combined reward
d1 = -INFINITY;
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd1[it] = -INFINITY;
first = true; // (because we also remember 'first')
if (!use_counts) { l1 = row_starts[i]; h1 = row_starts[i+1]; }
else { l1 = h1; h1 += row_counts[i]; }
for (j = l1; j < h1; j++) {
d2 = 0;
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd2[it] = 0;
if (!use_counts) { l2 = choice_starts[j]; h2 = choice_starts[j+1]; }
else { l2 = h2; h2 += choice_counts[j]; }
for (int rewi = 0; rewi < lenRew; rewi++) {
if (!ndsm_r[rewi]->use_counts) {
l2_r[rewi] = choice_starts_r[rewi][j];
h2_r[rewi] = choice_starts_r[rewi][j+1];
} else {
l2_r[rewi] = h2_r[rewi];
h2_r[rewi] += choice_counts_r[rewi][j];
}
}
for (k = l2; k < h2; k++) {
for (int rewi = 0; rewi < lenRew; rewi++) {
k_r[rewi] = l2_r[rewi];
while (k_r[rewi] < h2_r[rewi] && cols_r[rewi][k_r[rewi]] != cols[k])
k_r[rewi]++;
// if there is one, add reward * prob to reward value
if (k_r[rewi] < h2_r[rewi]) {
d2 += weights[rewi + lenProb] * non_zeros_r[rewi][k_r[rewi]] * non_zeros[k];
if (lenProb + rewi != ignoredWeight) {
pd2[rewi + lenProb] += non_zeros_r[rewi][k_r[rewi]] * non_zeros[k];
}
k_r[rewi]++;
}
}
//add value of successors
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
pd2[it] += non_zeros[k] * psoln[it][cols[k]];
d2 += non_zeros[k] * soln[cols[k]];
}
bool pickThis = first || (min&&(d2<d1)) || (!min&&(d2>d1));
//HOTFIX for cumulative reward
if (!pickThis && (d2==d1)) {
for (int it = 0; it < lenProb + lenRew; it++) {
if (it != ignoredWeight) {
if ((min&&(pd2[it]<pd1[it])) || (!min&&(pd2[it]>pd1[it]))) {
pickThis = true;
break;
}
}
}
}
if (pickThis) {
if (fabs(d2) < near_zero)
d1 = 0;
else
d1 = d2;
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
if (fabs(pd2[it]) < near_zero)
pd1[it] = 0; //round off small numbers to 0
else
pd1[it] = pd2[it];
}
}
}
first = false;
}
double val_yes = 0.0;
for (int probi = 0; probi < lenProb; probi++) {
if (probi != ignoredWeight && yes_vec[probi]!=NULL);
val_yes += weights[probi] * yes_vec[probi][i];
}
//HOTFIX: it seems that on self loops d1 can be unchanged because the other for cycle is not executed, which is not desirable
if (d1==-INFINITY) {
d1 = 0;
for (int it = 0; it < lenRew + lenProb; it++)
pd1[it] = 0;
}
//TODO: we need to handle val_yes somehow
if (val_yes == 0 || d1>val_yes)
{
switch (term_crit) {
case TERM_CRIT_RELATIVE:
if (fabs((soln[i] - d1)/d1) > max_diff)
max_diff = fabs((soln[i] - d1)/d1);
break;
case TERM_CRIT_ABSOLUTE:
if (fabs(soln[i] - d1) > max_diff)
max_diff = fabs(soln[i] - d1);
break;
}
soln[i] = d1;
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
switch (term_crit) {
case TERM_CRIT_RELATIVE:
if (weightedDone && fabs((psoln[it][i] - pd1[it])/pd1[it]) > max_diff)
max_diff = fabs((psoln[it][i] - pd1[it])/pd1[it]);
break;
case TERM_CRIT_ABSOLUTE:
if (weightedDone && fabs(psoln[it][i] - pd1[it]) > max_diff)
max_diff = fabs(psoln[it][i] - pd1[it]);
break;
}
psoln[it][i] = pd1[it];
}
}
} else {
double tmpval = 0;
for (int probi = 0; probi < lenProb; probi++)
tmpval += weights[probi] * yes_vec[probi][i];
//NB we don't round tmpval to zero, it should not be
//too small given that it'sonly based on weights and yes_vec
switch (term_crit) {
case TERM_CRIT_RELATIVE:
if (fabs((soln[i] - tmpval)/tmpval) > max_diff)
max_diff = fabs((soln[i] - tmpval)/tmpval);
break;
case TERM_CRIT_ABSOLUTE:
if (fabs(soln[i] - tmpval) > max_diff)
max_diff = fabs(soln[i] - tmpval);
break;
}
soln[i]=tmpval;
for (int probi = 0; probi < lenProb; probi++) {
if (probi != ignoredWeight) {
switch (term_crit) {
case TERM_CRIT_RELATIVE:
if (weightedDone && fabs((psoln[probi][i] - yes_vec[probi][i])/yes_vec[probi][i]) > max_diff)
max_diff = fabs((psoln[probi][i] - yes_vec[probi][i])/yes_vec[probi][i]);
break;
case TERM_CRIT_ABSOLUTE:
if (weightedDone && fabs(psoln[probi][i] - yes_vec[probi][i]) > max_diff)
max_diff = fabs(psoln[probi][i] - yes_vec[probi][i]);
break;
}
psoln[probi][i] = yes_vec[probi][i];
}
}
for (int rewi = 0; rewi < lenRew; rewi++) {
if (lenProb + rewi != ignoredWeight)
psoln[rewi + lenProb][i] = 0;
}
}
}
#ifdef MORE_OUTPUT
PS_PrintToMainLog(env, "soln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%e, ", soln[o]);
PS_PrintToMainLog(env, "\n");
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
PS_PrintToMainLog(env, "psoln: ");
for (int o = 0; o < n; o++)
PS_PrintToMainLog(env, "%e, ", psoln[it][o]);
PS_PrintToMainLog(env, "\n");
} else {
PS_PrintToMainLog(env, "psoln: (ignored)\n");
}
}
#endif
// check convergence
// (note: doing outside loop means may not need to check all elements)
if (!weightedDone) {
if (max_diff <= term_crit_param) {
weightedDone = true;
}
} else {
if (max_diff <= term_crit_param) {
done = true;
}
}
}
// stop clocks
stop = util_cpu_time();
time_for_iters = (double)(stop - start2)/1000;
time_taken = (double)(stop - start1)/1000;
// print iterations/timing info
PS_PrintToMainLog(env, "Iterative method: %d iterations in %.2f seconds (average %.6f, setup %.2f)\n", iters, time_taken, time_for_iters/iters, time_for_setup);
// if the iterative method didn't terminate, this is an error
if (iters == max_iters)
{
PS_SetErrorMessage("Iterative method did not converge within %d iterations.\nConsider using a different numerical method or increasing the maximum number of iterations", iters);
throw 1;
}
//store the result
ret = env->NewDoubleArray(lenProb + lenRew);
jdouble *retNative = env->GetDoubleArrayElements(ret, 0);
//copy all computed elements
for (int it = 0; it < lenRew + lenProb; it++)
if (it != ignoredWeight)
retNative[it] = max_double_vector_over_bdd(ddman, psoln[it], start, rvars, num_rvars, odd);
//compute the last element
if (ignoredWeight != -1) {
double last = max_double_vector_over_bdd(ddman, soln, start, rvars, num_rvars, odd);
for (int it = 0; it < lenRew + lenProb; it++) {
if (it != ignoredWeight) {
last -= weights[it] * retNative[it];
}
}
retNative[ignoredWeight] = (weights[ignoredWeight] > 0) ? (last / weights[ignoredWeight]) : 0.0;
}
env->ReleaseDoubleArrayElements(ret, retNative, 0);
// catch exceptions: register error, free memory
} catch (std::bad_alloc e) {
PS_SetErrorMessage("Out of memory");
if (soln) delete[] soln;
soln = 0;
} catch (int e) {
if (e==1) //1 means error was set above and exception was thrown to end the computation
ret = 0;
else
PS_SetErrorMessage("Unknown error.");
}
if (soln) delete[] soln;
if (yes_vec) delete[] yes_vec;
if (h2_r) delete[] h2_r;
if (l2_r) delete[] l2_r;
if (k_r) delete[] k_r;
if (pd1) delete[] pd1;
if (pd2) delete[] pd2;
for (int it = 0; it < lenProb + lenRew; it++) {
if (it != ignoredWeight)
if (psoln && psoln[it]) delete[] psoln[it];
}
if (psoln) delete[] psoln;
if (actions != NULL) {
delete[] actions;
release_string_array_from_java(env, action_names_jstrings, action_names, num_actions);
}
return ret;
}
//------------------------------------------------------------------------------