1、项目介绍
该项目实现了如何将强化学习应用于游戏。通过使用 Q-learning 和马尔可夫决策过程(MDP),训练智能体在游戏中做出正确的动作。游戏的目标是通过避免障碍物来减少惩罚,同时通过收集金币来增加分数。
1.1 项目简介
这是一个基于 Pygame 的强化学习项目,目的是展示 Q-learning 在一个简单游戏中的应用。玩家可以手动操作,也可以观看智能体如何通过自我训练来学习最佳策略。
1.2 实现功能
-
手动游戏:玩家可以通过键盘控制角色移动,避免障碍物和收集金币。
-
智能体自我训练:通过 Q-learning 算法,智能体会逐步学习最佳策略,以最大化得分。
-
碰撞检测:检测角色与障碍物或金币的碰撞,以决定得分变化。
-
状态转换和奖励:定义状态转换和奖励机制,使得 Q-learning 能够有效地训练智能体。
2、部分截图
3、部分代码
import os
import pygame as pg
import random
import numpy as np
# 定义资源和参数
window_height = 400
window_width = 400
window_dim = (window_height, window_width)
bg_color = (255, 255, 255)
object_height = 100
object_width = 100
# 初始化 Pygame 窗口
pg.init()
disp = pg.display.set_mode(window_dim)
pg.display.set_caption('Q-Learning Game')
clk = pg.time.Clock()
# 加载图片资源
neutral_ball = pg.image.load(os.path.join("resources", "ball1.jpg"))
negative_ball = pg.image.load(os.path.join("resources", "ball2.jpg"))
positive_ball = pg.image.load(os.path.join("resources", "ball3.jpg"))
pos_x = [0, 100, 200, 300]
pos_y = [0, 100, 200, 300]
# 绘制函数
def draw(obj, disp, x_pos, y_pos):
disp.blit(obj, (x_pos, y_pos))
# 碰撞检测函数
def collision(x1, y1, x2, y2):
return (x1 == x2) and (y1 == y2)
# 事件处理函数
def handle_event(event, pg, x, y):
if event.type == pg.KEYDOWN:
if event.key == pg.K_LEFT:
x = max(x - 1, 0)
if event.key == pg.K_RIGHT:
x = min(x + 1, 3)
return (x, y)
# Q-learning 算法核心
def learn():
iteration = 10
gamma = 0.8
state_value = np.zeros((2, 4, 4, 4))
action_value = np.ones((2, 4, 4, 4))
states = generate_states()
actions = [0, -1, 1]
for i in range(iteration):
for state in states:
for action in actions:
possible_states = state_transition(state, action)
summation = sum(transition_probability(state, action, new_state) * get_state_value(new_state, state_value) for new_state in possible_states)
q_value = reward(state) + gamma * summation
if get_state_value(state, state_value) < q_value:
set_state_value(state, state_value, q_value)
set_action_value(state, action_value, action)
return action_value
def get_state_value(state, state_value):
if state[0] == -1:
return state_value[0][state[1]][state[2]][state[3]]
else:
return state_value[state[0]][state[1]][state[2]][state[3]]
def set_state_value(state, state_value, new_value):
if state[0] == -1:
state_value[0][state[1]][state[2]][state[3]] = new_value
else:
state_value[state[0]][state[1]][state[2]][state[3]] = new_value
def set_action_value(state, action_value, action):
if state[0] == -1:
action_value[0][state[1]][state[2]][state[3]] = action
else:
action_value[state[0]][state[1]][state[2]][state[3]] = action
def generate_states():
output = []
for i in range(-1, 2, 2):
for j in range(0, 4):
for k in range(0, 4):
for l in range(0, 4):
output.append((i, j, k, l))
return output
def state_transition(state, action):
output_state = []
nball_x = state[3]
if state[2] == 3:
ball_y = 0
nball_x = clamp(nball_x + action, 0, 3)
for ball_x in range(0, 4):
for ball_type in range(-1, 2, 2):
output_state.append((ball_type, ball_x, ball_y, nball_x))
else:
ball_y = state[2] + 1
nball_x = clamp(nball_x + action, 0, 3)
ball_x = state[1]
ball_type = state[0]
output_state.append((ball_type, ball_x, ball_y, nball_x))
return output_state
def reward(state):
if state[2] == 3 and state[1] == state[3]:
return state[0]
else:
return 0
def transition_probability(current_state, action, next_state):
possible_states = state_transition(current_state, action)
if next_state in possible_states:
return 1.0 / len(possible_states)
else:
return 0
def clamp(number, lower_bound, upper_bound):
return max(lower_bound, min(upper_bound, number))
# 游戏主循环
def start_game():
abort = False
x = 0
y = 0
b_x = 0
b_y = 3
ball_type = 0
points = 0
action_output = learn()
while not abort:
for event in pg.event.get():
if event.type == pg.QUIT:
abort = True
(b_x, b_y) = handle_event(event, pg, b_x, b_y)
disp.fill(bg_color)
draw(neutral_ball, disp, pos_x[b_x], pos_y[b_y])
if collision(b_x, b_y, x, y):
points += ball_type * 2 - 1
print(points)
draw(negative_ball if ball_type == 0 else positive_ball, disp, pos_x[x], pos_y[y])
(b_x, b_y) = (clamp(b_x + int(action_output[ball_type][x][y][b_x]), 0, 3), 3)
y = (y + 1) % 4
if y == 0:
x = random.randrange(0, 4)
ball_type = random.randrange(0, 2)
pg.display.update()
clk.tick(6)
def end_game():
pg.quit()
quit()
# 启动游戏
if __name__ == "__main__":
start_game()