If there are
Systems with Complex State 207
several actions with the same reward, then one of the actions is chosen randomly.
The GetEnabledActions method is defined in the basic strategy; it enumerates all
the actions with the given action symbols that are enabled in the current state.
partial class CustomStrategy
{
public override Action SelectAction(Set
actionSymbols)
{
double bestRewardSoFar = 0;
Set bestActionsSoFar = Set.EmptySet;
foreach (Action a in GetEnabledActions(actionSymbols))
{
double reward = GetReward(a);
if (reward == bestRewardSoFar)
bestActionsSoFar = bestActionsSoFar.Add(a);
else if (reward > bestRewardSoFar)
{
bestRewardSoFar = reward;
bestActionsSoFar = new Set(a);
}
}
return (bestActionsSoFar.IsEmpty ? null
: bestActionsSoFar.Choose());
}
}
The idea behind the reward function is that it maps each enabled action in the
current state to a numeric real value that characterizes how ???rewarding??? it would
be to explore that action. Suppose that we wish to reward those actions more that
either cover new coverage points or cover more coverage points. We can calculate
a reward value as follow:
partial class CustomStrategy
{
double GetReward(Action a)
{
double reward = 0;
Bag cps = GetCoverage(CurrentState, a);
foreach (Term cp in cps)
{
int newC = cps.CountItem(cp);
int oldC = coveragePoints.
Pages:
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280