Python实现的Q-Learning入门Demo

2020-04-17 01:42:37 蜻蜓队长

直接上代码,下一篇复习一下原理

import numpy as np
#by Jarek.Tan ALPHA
= 0.1 GAMMA = 0.9 EPSILON = 0.9 EPOCH = 20 N_STATE = 6 ACTIONS = ["left", "right"] np.random.seed(2) Q_Table = np.zeros((N_STATE, len(ACTIONS))) def get_reward(S, A): if S == 5: R = 1 elif S == 4 and A == 'right': # S!=5 R = 1 else: R = 0 return R def take_action(S): if S == 0: return 1, 'right', np.max(Q_Table[1, :]) maxQ = np.max(Q_Table[S, :]) if maxQ == 0 or np.random.uniform() > EPSILON: if maxQ == 0: print("maxQ is 0") else: print("Random 0.1 hit") action = np.random.choice(ACTIONS) else: idx = np.argmax(Q_Table[S, :]) print("IDX:", idx) action = ACTIONS[idx] if action == 'left': SN = S - 1 else: SN = S + 1 maxQ = np.max(Q_Table[SN, :]) return SN, action, maxQ def get_value(S, A): if A == 'left': return Q_Table[S, 0] else: return Q_Table[S, 1] def set_value(S, A, V): if A == 'left': Q_Table[S, 0] = V else: Q_Table[S, 1] = V def update_q(S, A, MQ): value = (1 - ALPHA) * get_value(S, A) + ALPHA * (get_reward(S, A) + GAMMA * MQ) set_value(S, A, value) for loop in range(EPOCH): print("Loop:", loop) S = 0 step = 0 steps = [] while S != 5: SN, action, maxQ = take_action(S) update_q(S, action, maxQ) S = SN step += 1 steps.append(action) print("State:", S, "TotalSteps:", step, "\nDetail:", steps) print("Q_Table:", Q_Table)

 

以上内容来自于网络,如有侵权联系即删除
相关文章

上一篇: 剑指offer - 举例让抽象具体化

下一篇: 初识Semintic UI

客服紫薇:15852074331
在线咨询
客户经理