-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathexampleMDP.py
109 lines (90 loc) · 1.79 KB
/
exampleMDP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
This code belongs to the Probabilistic Graphical Models Python Library (PGM_PyLib)
PGM_PyLib: https://github.com/jona2510/PGM_PyLib
Check the "PGM_PyLib Manual vX.X.pdf" to see how the code works.
The PGM_PyLib is distributed under the GNU public license v3.0.
Code author: Jonathan Serrano-Pérez
"""
import numpy as np
from PGM_PyLib.MDP import MDP
R = np.array([
{1:-1, 2:-1}, #0
{2:-1, 3:-1},
{1:-1, 2:100, 3:-1}, #2
{1:-100, 3:-1},
{0:-1, 1:-1}, #4
{0:-1, 1:-1, 2:-100},
{0:100, 1:-1, 3:-1}, #6
{0:-1, 2:-1},
{2:-1, 3:-1}, #8
{0:-1, 2:-1, 3:-1},
{0:-100, 3:-1}, #10
])
FI = [
{ # u, d, r, l
0: {1:0.1, 2:0.1},
1: {1:0.1, 2:0.8},
4: {1:0.8, 2:0.1}
},
{
0: {2:0.1, 3:0.8},
1: {2:0.1, 3:0.1},
2: {2:0.8, 3:0.1}
},
{
1: {1:0.1, 2:0.1, 3:0.7},
2: {1:0.1, 2:0.1, 3:0.1},
3: {1:0.1, 2:0.7, 3:0.1},
5: {1:0.7, 2:0.1, 3:0.1}
},
{
2: {1:0.1, 3:0.8},
3: {1:0.1, 3:0.1},
6: {1:0.8, 3:0.1}
},
{
0: {0:0.8, 1:0.1},
4: {0:0.1, 1:0.1},
7: {0:0.1, 1:0.8}
},
{
2: {0:0.7, 1:0.1, 2:0.1},
5: {0:0.1, 1:0.1, 2:0.1},
6: {0:0.1, 1:0.1, 2:0.7},
9: {0:0.1, 1:0.7, 2:0.1}
},
{
3: {0:0.7, 1:0.1, 3:0.1},
5: {0:0.1, 1:0.1, 3:0.7},
6: {0:0.1, 1:0.1, 3:0.1},
10:{0:0.1, 1:0.7, 3:0.1}
},
{
4: {0:0.8, 2:0.1},
7: {0:0.1, 2:0.1},
8: {0:0.1, 2:0.8}
},
{
7: {2:0.1, 3:0.8},
8: {2:0.1, 3:0.1},
9: {2:0.8, 3:0.1}
},
{
5: {0:0.7, 2:0.1, 3:0.1},
8: {0:0.1, 2:0.1, 3:0.7},
9: {0:0.1, 2:0.1, 3:0.1},
10:{0:0.1, 2:0.7, 3:0.1}
},
{
6: {0:0.8, 3:0.1},
9: {0:0.1, 3:0.8},
10:{0:0.1, 3:0.1}
}
]
mdp = MDP( reward=R, stateTransition=FI, discountFactor=0.9 )
print("value iteration:")
policy = mdp.valueItetration(0.1)
print("policy:\n",policy)
print("\n policy iteration:")
policy = mdp.policyItetration()
print("policy:\n",policy)