Python:使用 Torch 构建一个五子棋游戏的 AI 玩家

在这篇博客中,我们将展示如何使用 PyGame 来构建一个简单的五子棋(Gomoku)游戏,并为 AI(白棋)实现基本的决策能力。这个 AI 玩家使用了 PyTorch 构建的神经网络模型来预测最优的下棋位置。注意:这里的代码只是一个示范,具体的 AI 性能和训练效果还需要根据实际数据进行优化和改进。

1. 游戏初始化

首先,我们需要初始化一个简单的五子棋游戏,包括棋盘、玩家、以及棋盘状态的管理。棋盘大小默认为 15x15 格子。每个格子可以为空(0),也可以是黑棋(1)或白棋(2)。

import pygame
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# 棋盘大小
BOARD_SIZE = 15
SQUARE_SIZE = 40
MARGIN = 5
WINDOW_SIZE = (SQUARE_SIZE * BOARD_SIZE + MARGIN * 2, SQUARE_SIZE * BOARD_SIZE + MARGIN * 2)

# 游戏设置
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
EMPTY = (200, 200, 200)
SELECTED = (100, 100, 100)

# 定义棋盘类
class GobangGame:
    def __init__(self, size=BOARD_SIZE):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)  # 0为空,1为黑,2为白
        self.current_player = 1  # 当前玩家(1为黑,2为白)

    def reset(self):
        self.board.fill(0)  # 清空棋盘
        self.current_player = 1  # 黑棋先手

    def play(self, x, y):
        if self.board[x, y] != 0:
            return False  # 该位置已经有棋子
        self.board[x, y] = self.current_player
        if self.check_win(x, y):
            return True
        self.current_player = 3 - self.current_player  # 切换玩家
        return False

    def check_win(self, x, y):
        """检查是否有五子连珠"""
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]  # 横、竖、斜线四个方向
        for dx, dy in directions:
            count = 1
            # 向前看
            i, j = x + dx, y + dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i + dx, j + dy
            # 向后看
            i, j = x - dx, y - dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i - dx, j - dy
            if count >= 5:
                return True
        return False

2. 神经网络模型

接下来,我们使用 PyTorch 构建一个简单的神经网络模型。该模型接受一个棋盘的状态(一个 15x15 的矩阵),并输出每个位置的得分。AI 通过这个得分选择最佳的下棋位置。

# 神经网络模型
class GobangNN(nn.Module):
    def __init__(self):
        super(GobangNN, self).__init__()
        self.fc1 = nn.Linear(BOARD_SIZE * BOARD_SIZE, 128)  # 输入棋盘大小的格子数
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, BOARD_SIZE * BOARD_SIZE)  # 输出每个位置的得分
        self.softmax = nn.Softmax(dim=0)  # 对所有输出位置进行softmax

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

3. AI 玩家

AI 玩家将使用神经网络来决定下棋的位置。为了简化,这里我们用随机生成的训练数据进行训练,实际上在实际应用中,你需要使用真实的对战数据进行训练。

# AI 玩家(白棋)
class GobangAI:
    def __init__(self, board, model):
        self.board = board
        self.size = board.shape[0]
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.CrossEntropyLoss()

    def best_move(self):
        board_flat = self.board.flatten().astype(np.float32)
        input_tensor = torch.tensor(board_flat)
        with torch.no_grad():
            output = self.model(input_tensor)
        move = torch.argmax(output).item()
        return move // self.size, move % self.size

    def train(self, training_data, labels, epochs=1000):
        for epoch in range(epochs):
            for i in range(len(training_data)):
                board = training_data[i].flatten().astype(np.float32)
                label = labels[i]

                # Forward pass
                input_tensor = torch.tensor(board)
                output = self.model(input_tensor)
                target = torch.tensor([label])

                # Compute loss
                loss = self.loss_fn(output.unsqueeze(0), target)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item()}")

4. 训练 AI

在这里,我们生成一些随机的训练数据和标签。实际应用中,你应该根据实际对战生成高质量的训练数据。

# 训练数据(在实际场景中,训练数据应更复杂和多样化)
# 这里只是简单的示例,你可以在实际情况中根据对战进行数据收集
training_data = [game.board.copy() for _ in range(1000)]  # 生成1000个示例
labels = [np.random.randint(0, BOARD_SIZE * BOARD_SIZE) for _ in range(1000)]  # 随机生成标签(对应棋盘上的位置)

# 训练AI
ai.train(training_data, labels, epochs=1000)

5. 游戏主循环

最后,我们使用 PyGame 创建一个窗口,并在窗口中显示棋盘。玩家(黑棋)可以点击棋盘来下棋,而 AI(白棋)将在每一轮做出最佳决策。

# 初始化 Pygame
pygame.init()

# 创建游戏窗口
screen = pygame.display.set_mode(WINDOW_SIZE)
pygame.display.set_caption("Gomoku Game")

# 加载字体
font = pygame.font.SysFont("Arial", 30)

# 创建游戏对象
game = GobangGame()

# 创建AI(白棋)
model = GobangNN()
ai = GobangAI(game.board, model)

# 游戏主循环
running = True
while running:
    screen.fill(WHITE)
    
    # 绘制棋盘
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            color = EMPTY
            if game.board[row, col] == 1:  # 黑棋
                color = BLACK
            elif game.board[row, col] == 2:  # 白棋
                color = (255, 255, 255)
            pygame.draw.rect(screen, color, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE))
            pygame.draw.rect(screen, (0, 0, 0), (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE), 1)
    
    # 获取用户点击
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if game.current_player == 1:  # 黑棋玩家点击
                x, y = event.pos
                row = (y - MARGIN) // SQUARE_SIZE
                col = (x - MARGIN) // SQUARE_SIZE
                if 0 <= row < BOARD_SIZE and 0 <= col < BOARD_SIZE:
                    if game.play(row, col):
                        print(f"Player {game.current_player} wins! Game Over!")
                        game.reset()

    # AI 白棋下棋
    if game.current_player == 2:
        ai_move = ai.best_move()
        if ai_move:
            x, y = ai_move
            if game.play(x, y):
                print(f"AI wins! Game Over!")
                game.reset()

    pygame.display.update()

pygame.quit()

6. 提示:替换数据集

请注意,这里的训练数据是随机生成的,实际上你应该根据真实的对战数据来训练 AI。你可以根据自己的需求,收集高质量的对局数据,以便提升 AI 的性能。模型训练的过程需要大量的数据和时间,而这个代码只是展示了一个简化的示范。

整体代码

import pygame
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# 棋盘大小
BOARD_SIZE = 15
SQUARE_SIZE = 40
MARGIN = 5
WINDOW_SIZE = (SQUARE_SIZE * BOARD_SIZE + MARGIN * 2, SQUARE_SIZE * BOARD_SIZE + MARGIN * 2)

# 游戏设置
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
EMPTY = (200, 200, 200)
SELECTED = (100, 100, 100)

# 定义棋盘类
class GobangGame:
    def __init__(self, size=BOARD_SIZE):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)  # 0为空,1为黑,2为白
        self.current_player = 1  # 当前玩家(1为黑,2为白)

    def reset(self):
        self.board.fill(0)  # 清空棋盘
        self.current_player = 1  # 黑棋先手

    def play(self, x, y):
        if self.board[x, y] != 0:
            return False  # 该位置已经有棋子
        self.board[x, y] = self.current_player
        if self.check_win(x, y):
            return True
        self.current_player = 3 - self.current_player  # 切换玩家
        return False

    def check_win(self, x, y):
        """检查是否有五子连珠"""
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]  # 横、竖、斜线四个方向
        for dx, dy in directions:
            count = 1
            # 向前看
            i, j = x + dx, y + dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i + dx, j + dy
            # 向后看
            i, j = x - dx, y - dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i - dx, j - dy
            if count >= 5:
                return True
        return False

# 神经网络模型
class GobangNN(nn.Module):
    def __init__(self):
        super(GobangNN, self).__init__()
        self.fc1 = nn.Linear(BOARD_SIZE * BOARD_SIZE, 128)  # 输入棋盘大小的格子数
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, BOARD_SIZE * BOARD_SIZE)  # 输出每个位置的得分
        self.softmax = nn.Softmax(dim=0)  # 对所有输出位置进行softmax

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# AI 玩家(白棋)
class GobangAI:
    def __init__(self, board, model):
        self.board = board
        self.size = board.shape[0]
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.CrossEntropyLoss()

    def best_move(self):
        board_flat = self.board.flatten().astype(np.float32)
        input_tensor = torch.tensor(board_flat)
        with torch.no_grad():
            output = self.model(input_tensor)
        move = torch.argmax(output).item()
        return move // self.size, move % self.size

    def train(self, training_data, labels, epochs=1000):
        for epoch in range(epochs):
            for i in range(len(training_data)):
                board = training_data[i].flatten().astype(np.float32)
                label = labels[i]

                # Forward pass
                input_tensor = torch.tensor(board)
                output = self.model(input_tensor)
                target = torch.tensor([label])

                # Compute loss
                loss = self.loss_fn(output.unsqueeze(0), target)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item()}")

# 初始化 Pygame
pygame.init()

# 创建游戏窗口
screen = pygame.display.set_mode(WINDOW_SIZE)
pygame.display.set_caption("Gomoku Game")

# 加载字体
font = pygame.font.SysFont("Arial", 30)

# 创建游戏对象
game = GobangGame()

# 创建AI(白棋)
model = GobangNN()
ai = GobangAI(game.board, model)

# 训练数据(在实际场景中,训练数据应更复杂和多样化)
# 这里只是简单的示例,你可以在实际情况中根据对战进行数据收集
training_data = [game.board.copy() for _ in range(1000)]  # 生成1000个示例
labels = [np.random.randint(0, BOARD_SIZE * BOARD_SIZE) for _ in range(1000)]  # 随机生成标签(对应棋盘上的位置)

# 训练AI
ai.train(training_data, labels, epochs=1000)

# 游戏主循环
running = True
while running:
    screen.fill(WHITE)
    
    # 绘制棋盘
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            color = EMPTY
            if game.board[row, col] == 1:  # 黑棋
                color = BLACK
            elif game.board[row, col] == 2:  # 白棋
                color = (255, 255, 255)
            pygame.draw.rect(screen, color, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE))
            pygame.draw.rect(screen, BORDER_COLOR, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE), 1)
    
    # 获取用户点击
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if game.current_player == 1:  # 黑棋玩家点击
                x, y = event.pos
                row = (y - MARGIN) // SQUARE_SIZE
                col = (x - MARGIN) // SQUARE_SIZE
                if 0 <= row < BOARD_SIZE and 0 <= col < BOARD_SIZE:
                    if game.play(row, col):
                        print(f"Player {game.current_player} wins! Game Over!")
                        game.reset()

    # AI 白棋下棋
    if game.current_player == 2:
        ai_move = ai.best_move()
        if ai_move:
            x, y = ai_move
            if game.play(x, y):
                print(f"AI wins! Game Over!")
                game.reset()

    pygame.display.update()

pygame.quit()

总结

通过这个示例,我们展示了如何创建一个简单的五子棋游戏,并为 AI 实现了一个基本的决策机制。你可以进一步改进 AI 的决策能力,采用更复杂的神经网络模型或者使用强化学习来训练 AI,使其更接近人类的水平。