[Python crawler] Use a crawler to grab the lottery numbers of Shuangseqiu and get the complete data, which is implemented in 45 lines of code, updated on 2023-06-28

Link: https://pan.baidu.com/s/18oE308_NVNPaCOACw_H5Hw?pwd=abc1 

Use crawlers to grab the lottery numbers of Shuangseqiu to obtain complete data, implemented in 45 lines of code, updated on 2023-06-28

This is the data on the Internet, how to crawl it down

It will only crawl the latest 30 Shuangseqiu lottery numbers, and write the results into a file named "Shuangseqiu lottery result.csv".

import requests
import os
from bs4 import BeautifulSoup


def download(url, page):
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    list = soup.select('div.ball_box01 ul li')
    ball = []
    for li in list:
        ball.append(li.string)
    write_to_excel(page, ball)
    print(f"第{page}期开奖结果录入完成")


def write_to_excel(page, ball):
    f = open('双色球开奖结果.csv', 'a', encoding='utf_8_sig')
    f.write(f'第{page}期,{ball[0]},{ball[1]},{ball[2]},{ball[3]},{ball[4]},{ball[5]},{ball[6]}\n')
    f.close()


def turn_page():
    url = "http://kaijiang.500.com/ssq.shtml"
    html = requests.get(url).text
    soup = BeautifulSoup(html, 'html.parser')
    pageList = soup.select("div.iSelectList a")

    # 获取最近30期的页码
    recent_pages = pageList[:30]

    for p in recent_pages:
        url = p['href']
        page = p.string
        download(url, page)


def main():
    if os.path.exists('双色球开奖结果.csv'):
        os.remove('双色球开奖结果.csv')
    turn_page()


if __name__ == '__main__':
    main()

 

The generated form is opened to verify the correctness of the data

Very good, let's write the neural network to predict

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

# 读取数据
data = pd.read_csv('双色球开奖结果.csv')

# 提取特征和标签
features = data.iloc[:, 1:7]  # 红色球特征
labels = data.iloc[:, 1:7]  # 红色球标签

# 创建随机森林回归模型
model = RandomForestRegressor(n_estimators=100, random_state=1)

# 拟合模型
model.fit(features, labels)

# 预测下一期的红色球号码
next_features = model.predict(features.iloc[-1].values.reshape(1, -1))
next_features = np.round(next_features).astype(int)

# 打印预测的红色球号码
print("预测的红色球号码:", next_features)

 This prediction method seems a bit simple, get a multi-layer perceptron for prediction

import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor

# 读取数据
data = pd.read_csv('双色球开奖结果.csv')

# 提取特征和标签
features = data.iloc[:, 1:7]  # 红色球特征
labels = data.iloc[:, 1:7]  # 红色球标签

# 创建多层感知机回归模型
model = MLPRegressor(hidden_layer_sizes=(100,), random_state=1)

# 拟合模型
model.fit(features, labels)

# 预测下一期的红色球号码
next_features = model.predict(features.iloc[[-1]])
next_features = np.round(next_features).astype(int)

# 打印预测的红色球号码
print("预测的红色球号码:", next_features)

The numbers predicted by these two codes are different, which may be caused by the different models used. Just play around.

Then do a simulated double-color ball game. If the selected number wins, it will display green.

import tkinter as tk
import random


class DoubleColorBallGame:
    def __init__(self, master):
        self.master = master
        self.master.title("双色球游戏")
        self.master.geometry("600x400")

        self.red_balls = []
        self.blue_balls = []

        self.selected_red_balls = []
        self.selected_blue_ball = None

        self.create_red_ball_buttons()
        self.create_blue_ball_buttons()
        self.create_draw_button()
        self.create_clear_button()
        self.create_result_label()

    def create_red_ball_buttons(self):
        red_frame = tk.Frame(self.master)
        red_frame.pack(pady=10)

        for i in range(1, 34):
            button = tk.Button(red_frame, text=str(i), width=2, command=lambda i=i: self.select_red_ball(i))
            button.grid(row=(i - 1) // 11, column=(i - 1) % 11, padx=2, pady=2)
            self.red_balls.append(button)

    def create_blue_ball_buttons(self):
        blue_frame = tk.Frame(self.master)
        blue_frame.pack(pady=10)

        for i in range(1, 17):
            button = tk.Button(blue_frame, text=str(i), width=2, command=lambda i=i: self.select_blue_ball(i))
            button.grid(row=0, column=i - 1, padx=2, pady=2)
            self.blue_balls.append(button)

    def create_draw_button(self):
        draw_frame = tk.Frame(self.master)
        draw_frame.pack(pady=10)

        draw_button = tk.Button(draw_frame, text="开奖", command=self.generate_draw_result)
        draw_button.pack()

    def create_clear_button(self):
        clear_frame = tk.Frame(self.master)
        clear_frame.pack(pady=10)

        clear_button = tk.Button(clear_frame, text="清除", command=self.clear_selection)
        clear_button.pack()

    def create_result_label(self):
        self.result_label = tk.Label(self.master, text="")
        self.result_label.pack(pady=20)

    def select_red_ball(self, number):
        if number in self.selected_red_balls:
            self.selected_red_balls.remove(number)
            self.red_balls[number - 1].config(bg="SystemButtonFace")
        else:
            self.selected_red_balls.append(number)
            self.red_balls[number - 1].config(bg="red")

    def select_blue_ball(self, number):
        if self.selected_blue_ball == number:
            self.selected_blue_ball = None
            self.blue_balls[number - 1].config(bg="SystemButtonFace")
        else:
            self.selected_blue_ball = number
            self.blue_balls[number - 1].config(bg="blue")

    def generate_draw_result(self):
        draw_result = []

        while len(draw_result) < 6:
            number = random.randint(1, 33)
            if number not in draw_result:
                draw_result.append(number)

        draw_result.sort()
        draw_result.append(random.randint(1, 16))

        self.result_label.config(
            text="红球:" + " ".join(str(ball) for ball in draw_result[:6]) + "\n蓝球:" + str(draw_result[6]))

        # 清除之前的标记
        for red_ball in self.red_balls:
            red_ball.config(bg="SystemButtonFace")

        for blue_ball in self.blue_balls:
            blue_ball.config(bg="SystemButtonFace")

        # 框选选择的号码和开奖号码
        for number in self.selected_red_balls:
            if number in draw_result[:6]:
                self.red_balls[number - 1].config(bg="green")

        if self.selected_blue_ball is not None:
            if self.selected_blue_ball == draw_result[6]:
                self.blue_balls[self.selected_blue_ball - 1].config(bg="green")

    def clear_selection(self):
        self.selected_red_balls = []
        self.selected_blue_ball = None

        for red_ball in self.red_balls:
            red_ball.config(bg="SystemButtonFace")

        for blue_ball in self.blue_balls:
            blue_ball.config(bg="SystemButtonFace")

        self.result_label.config(text="")


if __name__ == "__main__":
    root = tk.Tk()
    game = DoubleColorBallGame(root)
    root.mainloop()
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd

# 读取数据
data = pd.read_csv('双色球开奖结果.csv')

# 提取号码数据,转换成PyTorch张量
numbers = data.iloc[:, 1:8].values.astype(float)
numbers = torch.tensor(numbers, dtype=torch.float32)

# 归一化数据
numbers /= 33.0  # 因为双色球号码范围是1-33

# 构建训练集和测试集
train_data = numbers[:-1, :6]  # 使用前6期红球号码作为输入特征
train_red_balls = numbers[1:, :6]  # 使用第2到第7期红球号码作为训练的目标值
train_blue_balls = numbers[1:, 6]  # 使用第2到第7期蓝球号码作为训练的目标值

# 定义预测神经元模型
class PredictNeuronNet(nn.Module):
    def __init__(self, input_dim):
        super(PredictNeuronNet, self).__init__()
        self.predict_red_balls = nn.Linear(input_dim, 6)  # 输入维度为input_dim,输出维度为6,用于预测红球号码
        self.predict_blue_ball = nn.Linear(input_dim, 1)  # 输入维度为input_dim,输出维度为1,用于预测蓝球号码

    def forward(self, x):
        red_balls = self.predict_red_balls(x)
        blue_ball = self.predict_blue_ball(x)
        return red_balls, blue_ball

# 训练函数
def train_predict_neuron_model(model, criterion, optimizer, num_epochs):
    for epoch in range(num_epochs):
        red_balls, blue_ball = model(train_data)
        loss_red = criterion(red_balls, train_red_balls)  # 红球号码的损失
        loss_blue = criterion(blue_ball.view(-1), train_blue_balls)  # 蓝球号码的损失
        loss = loss_red + loss_blue

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# 主函数
if __name__ == '__main__':
    input_dim = 6  # 输入特征维度为6,即前6期红球号码
    model = PredictNeuronNet(input_dim)

    # 定义损失函数和优化器
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=0.01)

    # 训练模型
    num_epochs = 1000
    train_predict_neuron_model(model, criterion, optimizer, num_epochs)

    # 使用模型进行预测
    test_data = numbers[-1, :6]  # 使用最后6期红球号码作为测试数据的输入
    with torch.no_grad():
        test_data = test_data.view(1, -1)  # 将测试数据转换成(1, 6)的形状
        red_balls, blue_ball = model(test_data)

    # 将预测结果转换回原始范围
    red_balls *= 33.0
    blue_ball *= 33.0

    print("Predicted numbers for the next period (Red Balls):")
    print(red_balls)
    print("Predicted number for the next period (Blue Ball):")
    print(blue_ball)

Guess you like

Origin blog.csdn.net/weixin_55008315/article/details/131440087