pomdp const int N; module M s : [0..2]; // state h : [0..3]; // hidden var g : [0..N] init N; // num guesses left [toss] s=0 -> 0.1:(s'=1)&(h'=1) + 0.3:(s'=1)&(h'=2) + 0.6:(s'=1)&(h'=3); [guess1] s=1&g>0 -> (s'=(h=1)?2:s) & (g'=g-1); [guess2] s=1&g>0 -> (s'=(h=2)?2:s) & (g'=g-1); [guess3] s=1&g>0 -> (s'=(h=3)?2:s) & (g'=g-1); [timeup] s=1&g=0 -> (s'=3); [loop] s=2 -> true; endmodule observables s, g endobservables label "correct" = s=2; rewards "guesses" [guess1] true : 1; [guess2] true : 1; [guess3] true : 1; endrewards