Python：使用 Torch 构建一个五子棋游戏的 AI 玩家

在这篇博客中，我们将展示如何使用 PyGame 来构建一个简单的五子棋（Gomoku）游戏，并为 AI（白棋）实现基本的决策能力。这个 AI 玩家使用了 PyTorch 构建的神经网络模型来预测最优的下棋位置。注意：这里的代码只是一个示范，具体的 AI 性能和训练效果还需要根据实际数据进行优化和改进。

1. 游戏初始化

首先，我们需要初始化一个简单的五子棋游戏，包括棋盘、玩家、以及棋盘状态的管理。棋盘大小默认为 15x15 格子。每个格子可以为空（0），也可以是黑棋（1）或白棋（2）。

import pygame
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# 棋盘大小
BOARD_SIZE = 15
SQUARE_SIZE = 40
MARGIN = 5
WINDOW_SIZE = (SQUARE_SIZE * BOARD_SIZE + MARGIN * 2, SQUARE_SIZE * BOARD_SIZE + MARGIN * 2)

# 游戏设置
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
EMPTY = (200, 200, 200)
SELECTED = (100, 100, 100)

# 定义棋盘类
class GobangGame:
    def __init__(self, size=BOARD_SIZE):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)  # 0为空，1为黑，2为白
        self.current_player = 1  # 当前玩家（1为黑，2为白）

    def reset(self):
        self.board.fill(0)  # 清空棋盘
        self.current_player = 1  # 黑棋先手

    def play(self, x, y):
        if self.board[x, y] != 0:
            return False  # 该位置已经有棋子
        self.board[x, y] = self.current_player
        if self.check_win(x, y):
            return True
        self.current_player = 3 - self.current_player  # 切换玩家
        return False

    def check_win(self, x, y):
        """检查是否有五子连珠"""
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]  # 横、竖、斜线四个方向
        for dx, dy in directions:
            count = 1
            # 向前看
            i, j = x + dx, y + dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i + dx, j + dy
            # 向后看
            i, j = x - dx, y - dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i - dx, j - dy
            if count >= 5:
                return True
        return False

2. 神经网络模型

接下来，我们使用 PyTorch 构建一个简单的神经网络模型。该模型接受一个棋盘的状态（一个 15x15 的矩阵），并输出每个位置的得分。AI 通过这个得分选择最佳的下棋位置。

# 神经网络模型
class GobangNN(nn.Module):
    def __init__(self):
        super(GobangNN, self).__init__()
        self.fc1 = nn.Linear(BOARD_SIZE * BOARD_SIZE, 128)  # 输入棋盘大小的格子数
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, BOARD_SIZE * BOARD_SIZE)  # 输出每个位置的得分
        self.softmax = nn.Softmax(dim=0)  # 对所有输出位置进行softmax

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

3. AI 玩家

AI 玩家将使用神经网络来决定下棋的位置。为了简化，这里我们用随机生成的训练数据进行训练，实际上在实际应用中，你需要使用真实的对战数据进行训练。

# AI 玩家（白棋）
class GobangAI:
    def __init__(self, board, model):
        self.board = board
        self.size = board.shape[0]
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.CrossEntropyLoss()

    def best_move(self):
        board_flat = self.board.flatten().astype(np.float32)
        input_tensor = torch.tensor(board_flat)
        with torch.no_grad():
            output = self.model(input_tensor)
        move = torch.argmax(output).item()
        return move // self.size, move % self.size

    def train(self, training_data, labels, epochs=1000):
        for epoch in range(epochs):
            for i in range(len(training_data)):
                board = training_data[i].flatten().astype(np.float32)
                label = labels[i]

                # Forward pass
                input_tensor = torch.tensor(board)
                output = self.model(input_tensor)
                target = torch.tensor([label])

                # Compute loss
                loss = self.loss_fn(output.unsqueeze(0), target)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item()}")

4. 训练 AI

在这里，我们生成一些随机的训练数据和标签。实际应用中，你应该根据实际对战生成高质量的训练数据。

# 训练数据（在实际场景中，训练数据应更复杂和多样化）
# 这里只是简单的示例，你可以在实际情况中根据对战进行数据收集
training_data = [game.board.copy() for _ in range(1000)]  # 生成1000个示例
labels = [np.random.randint(0, BOARD_SIZE * BOARD_SIZE) for _ in range(1000)]  # 随机生成标签（对应棋盘上的位置）

# 训练AI
ai.train(training_data, labels, epochs=1000)

5. 游戏主循环

最后，我们使用 PyGame 创建一个窗口，并在窗口中显示棋盘。玩家（黑棋）可以点击棋盘来下棋，而 AI（白棋）将在每一轮做出最佳决策。

# 初始化 Pygame
pygame.init()

# 创建游戏窗口
screen = pygame.display.set_mode(WINDOW_SIZE)
pygame.display.set_caption("Gomoku Game")

# 加载字体
font = pygame.font.SysFont("Arial", 30)

# 创建游戏对象
game = GobangGame()

# 创建AI（白棋）
model = GobangNN()
ai = GobangAI(game.board, model)

# 游戏主循环
running = True
while running:
    screen.fill(WHITE)
    
    # 绘制棋盘
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            color = EMPTY
            if game.board[row, col] == 1:  # 黑棋
                color = BLACK
            elif game.board[row, col] == 2:  # 白棋
                color = (255, 255, 255)
            pygame.draw.rect(screen, color, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE))
            pygame.draw.rect(screen, (0, 0, 0), (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE), 1)
    
    # 获取用户点击
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if game.current_player == 1:  # 黑棋玩家点击
                x, y = event.pos
                row = (y - MARGIN) // SQUARE_SIZE
                col = (x - MARGIN) // SQUARE_SIZE
                if 0 <= row < BOARD_SIZE and 0 <= col < BOARD_SIZE:
                    if game.play(row, col):
                        print(f"Player {game.current_player} wins! Game Over!")
                        game.reset()

    # AI 白棋下棋
    if game.current_player == 2:
        ai_move = ai.best_move()
        if ai_move:
            x, y = ai_move
            if game.play(x, y):
                print(f"AI wins! Game Over!")
                game.reset()

    pygame.display.update()

pygame.quit()

6. 提示：替换数据集

请注意，这里的训练数据是随机生成的，实际上你应该根据真实的对战数据来训练 AI。你可以根据自己的需求，收集高质量的对局数据，以便提升 AI 的性能。模型训练的过程需要大量的数据和时间，而这个代码只是展示了一个简化的示范。

整体代码

import pygame
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# 棋盘大小
BOARD_SIZE = 15
SQUARE_SIZE = 40
MARGIN = 5
WINDOW_SIZE = (SQUARE_SIZE * BOARD_SIZE + MARGIN * 2, SQUARE_SIZE * BOARD_SIZE + MARGIN * 2)

# 游戏设置
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
EMPTY = (200, 200, 200)
SELECTED = (100, 100, 100)

# 定义棋盘类
class GobangGame:
    def __init__(self, size=BOARD_SIZE):
        self.size = size
        self.board = np.zeros((size, size), dtype=int)  # 0为空，1为黑，2为白
        self.current_player = 1  # 当前玩家（1为黑，2为白）

    def reset(self):
        self.board.fill(0)  # 清空棋盘
        self.current_player = 1  # 黑棋先手

    def play(self, x, y):
        if self.board[x, y] != 0:
            return False  # 该位置已经有棋子
        self.board[x, y] = self.current_player
        if self.check_win(x, y):
            return True
        self.current_player = 3 - self.current_player  # 切换玩家
        return False

    def check_win(self, x, y):
        """检查是否有五子连珠"""
        directions = [(1, 0), (0, 1), (1, 1), (1, -1)]  # 横、竖、斜线四个方向
        for dx, dy in directions:
            count = 1
            # 向前看
            i, j = x + dx, y + dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i + dx, j + dy
            # 向后看
            i, j = x - dx, y - dy
            while 0 <= i < self.size and 0 <= j < self.size and self.board[i, j] == self.board[x, y]:
                count += 1
                i, j = i - dx, j - dy
            if count >= 5:
                return True
        return False

# 神经网络模型
class GobangNN(nn.Module):
    def __init__(self):
        super(GobangNN, self).__init__()
        self.fc1 = nn.Linear(BOARD_SIZE * BOARD_SIZE, 128)  # 输入棋盘大小的格子数
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, BOARD_SIZE * BOARD_SIZE)  # 输出每个位置的得分
        self.softmax = nn.Softmax(dim=0)  # 对所有输出位置进行softmax

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        x = self.softmax(x)
        return x

# AI 玩家（白棋）
class GobangAI:
    def __init__(self, board, model):
        self.board = board
        self.size = board.shape[0]
        self.model = model
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.loss_fn = nn.CrossEntropyLoss()

    def best_move(self):
        board_flat = self.board.flatten().astype(np.float32)
        input_tensor = torch.tensor(board_flat)
        with torch.no_grad():
            output = self.model(input_tensor)
        move = torch.argmax(output).item()
        return move // self.size, move % self.size

    def train(self, training_data, labels, epochs=1000):
        for epoch in range(epochs):
            for i in range(len(training_data)):
                board = training_data[i].flatten().astype(np.float32)
                label = labels[i]

                # Forward pass
                input_tensor = torch.tensor(board)
                output = self.model(input_tensor)
                target = torch.tensor([label])

                # Compute loss
                loss = self.loss_fn(output.unsqueeze(0), target)
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

            if epoch % 100 == 0:
                print(f"Epoch {epoch}, Loss: {loss.item()}")

# 初始化 Pygame
pygame.init()

# 创建游戏窗口
screen = pygame.display.set_mode(WINDOW_SIZE)
pygame.display.set_caption("Gomoku Game")

# 加载字体
font = pygame.font.SysFont("Arial", 30)

# 创建游戏对象
game = GobangGame()

# 创建AI（白棋）
model = GobangNN()
ai = GobangAI(game.board, model)

# 训练数据（在实际场景中，训练数据应更复杂和多样化）
# 这里只是简单的示例，你可以在实际情况中根据对战进行数据收集
training_data = [game.board.copy() for _ in range(1000)]  # 生成1000个示例
labels = [np.random.randint(0, BOARD_SIZE * BOARD_SIZE) for _ in range(1000)]  # 随机生成标签（对应棋盘上的位置）

# 训练AI
ai.train(training_data, labels, epochs=1000)

# 游戏主循环
running = True
while running:
    screen.fill(WHITE)
    
    # 绘制棋盘
    for row in range(BOARD_SIZE):
        for col in range(BOARD_SIZE):
            color = EMPTY
            if game.board[row, col] == 1:  # 黑棋
                color = BLACK
            elif game.board[row, col] == 2:  # 白棋
                color = (255, 255, 255)
            pygame.draw.rect(screen, color, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE))
            pygame.draw.rect(screen, BORDER_COLOR, (col * SQUARE_SIZE + MARGIN, row * SQUARE_SIZE + MARGIN, SQUARE_SIZE, SQUARE_SIZE), 1)
    
    # 获取用户点击
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False
        elif event.type == pygame.MOUSEBUTTONDOWN:
            if game.current_player == 1:  # 黑棋玩家点击
                x, y = event.pos
                row = (y - MARGIN) // SQUARE_SIZE
                col = (x - MARGIN) // SQUARE_SIZE
                if 0 <= row < BOARD_SIZE and 0 <= col < BOARD_SIZE:
                    if game.play(row, col):
                        print(f"Player {game.current_player} wins! Game Over!")
                        game.reset()

    # AI 白棋下棋
    if game.current_player == 2:
        ai_move = ai.best_move()
        if ai_move:
            x, y = ai_move
            if game.play(x, y):
                print(f"AI wins! Game Over!")
                game.reset()

    pygame.display.update()

pygame.quit()

总结

通过这个示例，我们展示了如何创建一个简单的五子棋游戏，并为 AI 实现了一个基本的决策机制。你可以进一步改进 AI 的决策能力，采用更复杂的神经网络模型或者使用强化学习来训练 AI，使其更接近人类的水平。