Data Analysis_Numpy

#numpy 对数组的操作

import numpy as np

us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"

t1 = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据
t2 = np.loadtxt(uk_file_path,dtype="int",delimiter=",",unpack= True) #转置 把行变成列
print(t1)
print(t2)

#取一行
# print(t2[2])

#取多行
# print(t2[2:])

#取多行
# print(t2[[2,8,10]])

#取列
print("*" * 100)
# print(t2[:,0])
#取连续的多列
#print(t2[:,2:])

#取不连续的多列
# print(t2[:,[0,2]])

#取多行多列 ,3行4列值
# a = t1[2,3]
# print(a)
# print(type(a))

#取多行多列，取第3行到5行， 第2列到4列
#取得是交叉点的位置
# print(t2[2:5,1:4])

#取出多个不相邻的点 选出的结果 (0,0) (2,1) (2,3)
c = t2[[0,2,2],[0,1,3]]
print(c)

# 把数组的nan替换成中值

import  numpy as np


def fill_ndarry(t1):
    for  i in range(t1.shape[1]): #取总列数
        temp_col = t1[:,i] #当前一列
        print(temp_col)
        nan_num = np.count_nonzero(temp_col != temp_col)

        if nan_num != 0: #不为0 说明当前一列有 nan

            #当前一列不为nan
            temp_not_nan_col  = temp_col[temp_col == temp_col]
            print(temp_not_nan_col)

            #选中当前为nan位置，把值赋值为nan的均值
            temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
    return  t1


if __name__ == '__main__':
    t1 = np.arange(12).reshape((3, 4)).astype("float")

    # 赋值nan
    t1[1, 2:] = np.nan
    print(t1)

    t2 = fill_ndarry(t1)
    print(t2)

# 绘制评论直方统计图

import numpy as np
from  matplotlib import  pyplot as plt

us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"

t_us  = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据

#获取评论数
t_us_comments = t_us [:,-1]

t_us_comments = t_us_comments[t_us_comments <= 5000]

d = 50

bin_mins = (t_us_comments.max() - t_us_comments.min()) // d

#绘图
plt.figure(figsize=(16,8),dpi=80)

plt.hist(t_us_comments, bin_mins)

plt.show()

#绘制评论喜欢散点图

import numpy as np
from  matplotlib import  pyplot as plt

us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"

t_uk  = np.loadtxt(uk_file_path,dtype="int",delimiter=",") #从csv读取数据
t_uk = t_uk[t_uk[:,1] <= 500000]
t_uk_comment = t_uk[:,-1]
t_uk_like = t_uk[:,1]

plt.figure(figsize=(16,8), dpi=80)

plt.scatter(t_uk_like, t_uk_comment)

plt.show()

#分别数组添加列

import numpy as np

us_file_path ="./US_video_data_numbers.csv"
uk_file_path ="./GB_video_data_numbers.csv"
t1 = np.loadtxt(us_file_path,dtype="int",delimiter=",") #从csv读取数据
t2 = np.loadtxt(uk_file_path,dtype="int",delimiter=",")

#添加信息，构造全为0的数据
zero_data = np.zeros((t1.shape[0],1)).astype(int)
ones_zero = np.ones((t2.shape[0],1)).astype(int)

#分别添加一列0，1的数组
t1 = np.hstack((t1, zero_data))
t2 = np.hstack((t2, ones_zero))

#拼接2组数组
final_data = np.vstack((t1,t2))

print(final_data)

猜你喜欢