-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbot_belief_1.pyx
110 lines (97 loc) · 4.13 KB
/
bot_belief_1.pyx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Holds a belief for a single float value.
The belief is updated linearly, as a function of the current state and the last
belief. Actions are also evaluated linearly, with a coefficient for the belief.
The Belief lag and belief trust meta-parameters influence the generation of
the coefficients. Belief lag weakens the dependence of the belief on the state,
a value of .1 would make the belief update based on the state 10 times smaller
than the belief update based on the previous belief. Similarly, belief trust
weakens the impact of the belief on action evaluation. Both are drawn from the
"unit" distribution when generating new parameters.
Assumes 4 actions.
"""
from cython import ccall, cclass, locals, returns
from bot_base cimport BaseBot
from interface cimport c_do_action, c_get_state
@cclass
class Bot(BaseBot):
@staticmethod
def shapes(steps, actions, features):
return {
'free': (actions,),
'state0l': (actions, features),
'belief0l': (actions,),
'belief_free': (1,),
'belief_state0l': (features,),
'belief_belief0l': (1,)
}
def __cinit__(self, *args, **kwargs):
self.belief = 0
@ccall
@returns('Bot')
@locals(state='bint', bot='Bot')
def clone(self, state=True):
bot = BaseBot.clone(self, state)
if state:
bot.belief = self.belief
return bot
@ccall
@returns('dict')
@locals(dists='dict', emphases='tuple',
belief_trust='float', belief_lag='float',
multipliers='dict', params='dict')
def new_params(self, dists, emphases):
belief_trust = dists['unit'].rvs()
belief_lag = dists['unit'].rvs()
multipliers = self.param_multipliers
multipliers['belief0l'] = belief_trust
multipliers['belief_state0l'] = belief_lag
multipliers['belief_belief0l'] = belief_trust * belief_lag
params = BaseBot.new_params(self, dists, emphases)
params['_belief_trust'] = belief_trust
params['_belief_lag'] = belief_lag
return params
@ccall
@returns('void')
@locals(steps='int', step='int', action='int',
features='int', feature='int',
free='float[4]', state0l='float[:, ::1]', belief0l='float[4]',
belief_free='float', belief_state0l='float[::1]',
belief_belief0l='float', belief='float',
values='float[4]', state0='float*', state0f='float')
def act(self, steps):
features = self.level['features']
free = self.params['free']
state0l = self.params['state0l']
belief0l = self.params['belief0l']
belief_free = self.params['belief_free']
belief_state0l = self.params['belief_state0l']
belief_belief0l = self.params['belief_belief0l']
belief = self.belief
action = -1
for step in range(steps):
values = free[:]
values[0] += belief0l[0] * belief
values[1] += belief0l[1] * belief
values[2] += belief0l[2] * belief
values[3] += belief0l[3] * belief
state0 = c_get_state()
for feature in range(features):
state0f = state0[feature]
values[0] += state0l[0, feature] * state0f
values[1] += state0l[1, feature] * state0f
values[2] += state0l[2, feature] * state0f
values[3] += state0l[3, feature] * state0f
action = (((0 if values[0] > values[3] else 3)
if values[0] > values[2] else
(2 if values[2] > values[3] else 3))
if values[0] > values[1] else
((1 if values[1] > values[3] else 3)
if values[1] > values[2] else
(2 if values[2] > values[3] else 3)))
c_do_action(action)
belief = belief_free + belief_belief0l * belief
for feature in range(features):
belief += belief_state0l[feature] * state0[feature]
self.belief = belief
self.last_action = action