NumPy数据存取与函数

学习笔记手札及单元小结

01
02
03
04
05
06
07
08
09
11
12

数据的CSV文件存取

CSV文件,请在IPython平台运行

import numpy as np  #引入numpy库,模块别名为np

a = np.arange(100).reshape(5,20)

np.savetxt('a.csv',a,fmt='%d',delimiter=',')

a = np.arange(100).reshape(5,20)

np.savetxt('a.csv',a,fmt='%.1f',delimiter=',')

b = np.loadtxt('a.csv',delimiter=',')

b
Out[8]:    #out后面为输出结果
array([[ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
        13., 14., 15., 16., 17., 18., 19.],
       [20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
        33., 34., 35., 36., 37., 38., 39.],
       [40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50., 51., 52.,
        53., 54., 55., 56., 57., 58., 59.],
       [60., 61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72.,
        73., 74., 75., 76., 77., 78., 79.],
       [80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90., 91., 92.,
        93., 94., 95., 96., 97., 98., 99.]])

b = np.loadtxt('a.csv',dtype=np.int,delimiter=',')

b
Out[10]: 
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
        56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
        76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
        96, 97, 98, 99]])

多维数据的存取

b = np.loadtxt('a.csv',dtype=np.int,delimiter=',')

b
Out[10]: 
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
        56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75,
        76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
        96, 97, 98, 99]])

a = np.arange(100).reshape(5,10,2)

a.tofile("b.dat",sep=",",format='%d')

a = np.arange(100).reshape(5,10,2)

a.tofile("b.dat",format='%d')

a = np.arange(100).reshape(5,10,2)

a.tofile("b.dat",sep=",",format='%d')

c = np.fromfile("b.dat",dtype=np.int,sep=",")

c
Out[18]: 
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

c = np.fromfile("b.dat",dtype=np.int,sep=",").reshape(5,10,2)
c
Out[20]: 
array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19]],

       [[20, 21],
        [22, 23],
        [24, 25],
        [26, 27],
        [28, 29],
        [30, 31],
        [32, 33],
        [34, 35],
        [36, 37],
        [38, 39]],

       [[40, 41],
        [42, 43],
        [44, 45],
        [46, 47],
        [48, 49],
        [50, 51],
        [52, 53],
        [54, 55],
        [56, 57],
        [58, 59]],

       [[60, 61],
        [62, 63],
        [64, 65],
        [66, 67],
        [68, 69],
        [70, 71],
        [72, 73],
        [74, 75],
        [76, 77],
        [78, 79]],

       [[80, 81],
        [82, 83],
        [84, 85],
        [86, 87],
        [88, 89],
        [90, 91],
        [92, 93],
        [94, 95],
        [96, 97],
        [98, 99]]])

需要注意,该方法需要读取时知道存入文件时数组的维度和元素类型,a.tofile()和np.fromfile()需要配合使用,可以通过元数据文件来存储额外信息。

NumPy的快捷文件存取


a = np.arange(100).reshape(5,10,2)

np.save("a.npy",a)

b = np.load("a.npy")

b
Out[24]: 
array([[[ 0,  1],
        [ 2,  3],
        [ 4,  5],
        [ 6,  7],
        [ 8,  9],
        [10, 11],
        [12, 13],
        [14, 15],
        [16, 17],
        [18, 19]],

       [[20, 21],
        [22, 23],
        [24, 25],
        [26, 27],
        [28, 29],
        [30, 31],
        [32, 33],
        [34, 35],
        [36, 37],
        [38, 39]],

       [[40, 41],
        [42, 43],
        [44, 45],
        [46, 47],
        [48, 49],
        [50, 51],
        [52, 53],
        [54, 55],
        [56, 57],
        [58, 59]],

       [[60, 61],
        [62, 63],
        [64, 65],
        [66, 67],
        [68, 69],
        [70, 71],
        [72, 73],
        [74, 75],
        [76, 77],
        [78, 79]],

       [[80, 81],
        [82, 83],
        [84, 85],
        [86, 87],
        [88, 89],
        [90, 91],
        [92, 93],
        [94, 95],
        [96, 97],
        [98, 99]]])

NumPy的随机数函数

import numpy as np

a = np.random.rand(3,4,5)

a
Out[3]: 
array([[[0.92126078, 0.72702673, 0.13624911, 0.73092708, 0.42956826],
        [0.95508714, 0.57926769, 0.03424191, 0.59152571, 0.02174399],
        [0.42098051, 0.93879879, 0.84240545, 0.04235163, 0.45768729],
        [0.62887218, 0.85697452, 0.44094573, 0.563129  , 0.993858  ]],

       [[0.55746268, 0.96863765, 0.31039295, 0.53537751, 0.65815568],
        [0.26423492, 0.82525228, 0.87460426, 0.86163507, 0.19211628],
        [0.76989613, 0.56726312, 0.44005911, 0.4682608 , 0.05538174],
        [0.87612542, 0.45498654, 0.1775557 , 0.72461702, 0.70123385]],

       [[0.08351399, 0.06128516, 0.42343067, 0.86004714, 0.66739059],
        [0.25322708, 0.63308245, 0.0344849 , 0.71008877, 0.41591252],
        [0.04722793, 0.95979877, 0.59740886, 0.82616566, 0.71476045],
        [0.83329752, 0.059742  , 0.13737899, 0.88026533, 0.0881806 ]]])

sn = np.random.randn(3,4,5)

sn
Out[5]: 
array([[[-1.68621026, -0.54643652,  0.59114768,  0.55151255,
          0.50860481],
        [ 0.68583257,  1.76492401,  1.10093793,  1.57387225,
         -0.45179255],
        [ 0.22051625, -0.39438724, -1.31568633,  0.02240675,
         -0.06721427],
        [ 3.00847932,  1.22789191,  3.17766278, -0.65571714,
         -0.22554802]],

       [[-1.08297144,  0.12654454,  0.71503543, -3.15202389,
         -0.61358609],
        [ 1.03379153,  0.02344163,  1.22937763, -1.13211814,
         -0.2089044 ],
        [-0.75627669,  0.43724419,  0.57417111,  1.28755106,
          0.65818801],
        [-0.09347696, -0.11455071, -0.20818887,  0.23148983,
         -0.90157041]],

       [[ 0.01811455,  1.44858211, -0.13296883,  1.30496287,
          0.5481228 ],
        [-1.50693645, -0.88008349,  0.53535135, -0.21610831,
          0.83826012],
        [-0.30166713, -0.37950129, -1.1221579 , -2.06424955,
         -1.04543235],
        [ 1.76997155, -0.53683235, -1.70964818,  0.37693068,
         -0.40056297]]])

b = np.random.randint(100,200,(3,4))

b
Out[7]: 
array([[173, 114, 192, 155],
       [136, 166, 193, 135],
       [191, 135, 183, 176]])
b = np.random.randint(100,200,(3,4))

b
Out[9]: 
array([[162, 182, 118, 155],
       [108, 147, 116, 119],
       [108, 197, 131, 125]])

np.random.seed(10)

np.random.randint(100,200,(3,4))
Out[11]: 
array([[109, 115, 164, 128],
       [189, 193, 129, 108],
       [173, 100, 140, 136]])

np.random.seed(10)

np.random.randint(100,200,(3,4))
Out[13]: 
array([[109, 115, 164, 128],
       [189, 193, 129, 108],
       [173, 100, 140, 136]])

函数小试

import numpy as np

a = np.random.randint(100,200,(3,4))

a
Out[3]: 
array([[178, 175, 192, 186],
       [197, 155, 120, 135],
       [119, 105, 176, 109]])

np.random.shuffle(a)

a
Out[5]: 
array([[178, 175, 192, 186],
       [197, 155, 120, 135],
       [119, 105, 176, 109]])


np.random.shuffle(a)#改变数组x

a
Out[9]: 
array([[119, 105, 176, 109],  #看这里,与上面有何不同
       [178, 175, 192, 186],  
       [197, 155, 120, 135]])
import numpy as np

a = np.random.randint(100,200,(3,4))

a
Out[3]: 
array([[192, 140, 102, 136],
       [131, 137, 122, 188],
       [111, 103, 157, 124]])

np.random.permutation(a)
Out[4]: 
array([[111, 103, 157, 124],
       [131, 137, 122, 188],
       [192, 140, 102, 136]])

a 
Out[5]:              #a的数组与上面相同,没有变化
array([[192, 140, 102, 136],
       [131, 137, 122, 188],
       [111, 103, 157, 124]])
import numpy as np

b = np.random.randint(100,200,(8,))

b
Out[3]: array([114, 164, 148, 105, 113, 187, 113, 164])

np.random.choice(b,(3,2),replace=False)
Out[4]: 
array([[105, 164],
       [164, 114],
       [148, 187]])

np.random.choice(b,(3,2),p=b/np.sum(b))  #注意p=b/np.sum(b)的变化
Out[5]: 
array([[187, 105],
       [164, 164],
       [105, 114]])

正态分布

import numpy as np

u = np.random.uniform(0,10,(3,4))

u
Out[3]: 
array([[3.29156207, 1.68210832, 5.69714857, 5.45456271],
       [4.61777858, 8.88234325, 6.39212963, 5.92899842],
       [3.59607425, 8.80170278, 2.77462772, 9.58831913]])

n = np.random.normal(10,5,(3,4))

n
Out[5]: 
array([[13.7804125 , 12.61610026, 16.7549758 ,  7.36912023],
       [ 8.4167832 ,  6.3346798 , 17.30410627, 20.55554637],
       [11.52397011, 11.74722443, 16.11041637,  8.30900004]])

NumPy的统计函数

import numpy as np

a = np.arange(15).reshape(3,5)

a
Out[3]: 
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

np.sum(a)
Out[4]: 105

np.mean(a,axis=1)
Out[5]: array([ 2.,  7., 12.])

np.mean(a,axis=0)
Out[6]: array([5., 6., 7., 8., 9.])

np.average(a,axis=0,weights=[10,5,1])#求加权平均值,例4.1875=2*10+7*5+1*12/(10+5+1)
Out[7]: array([2.1875, 3.1875, 4.1875, 5.1875, 6.1875])

np.std(a) #求标准差
Out[8]: 4.320493798938574

np.var(a) #求方差
Out[9]: 18.666666666666668
import numpy as np

b = np.arange(15,0,-1).reshape(3,5)

b
Out[3]: 
array([[15, 14, 13, 12, 11],
       [10,  9,  8,  7,  6],
       [ 5,  4,  3,  2,  1]])

np.max(b)
Out[4]: 15

np.argmax(b) #扁平化后的下标
Out[5]: 0

np.unravel_index(np.argmax(b),b.shape)#重塑成多维下标
Out[6]: (0, 0)

np.ptp(b)#求最大值与最小值的差
Out[7]: 14

np.median(b) #求中位数
Out[8]: 8.0

NumPy的梯度函数

import numpy as np

a = np.random.randint(0,20,(5))

a
Out[3]: array([14, 19, 10,  8,  5])

np.gradient(a)
Out[4]: array([ 5. , -2. , -5.5, -2.5, -3. ])#-2。=(10-14)/2存在两侧值;-3.=(5-8)/1只有一侧值

b = np.random.randint(0,20,(5))

b
Out[6]: array([ 7, 17,  7,  2, 12])

np.gradient(b)
Out[7]: array([10. ,  0. , -7.5,  2.5, 10. ])
import numpy as np

c = np.random.randint(0,50,(3,5))

c
Out[3]: 
array([[22, 31,  9, 24, 37],
       [38, 25, 21, 10, 13],
       [48, 20, 45, 39, 28]])

np.gradient(c)
Out[4]: 
[array([[ 16. ,  -6. ,  12. , -14. , -24. ],  #最外层维度的梯度
        [ 13. ,  -5.5,  18. ,   7.5,  -4.5],
        [ 10. ,  -5. ,  24. ,  29. ,  15. ]]),
 array([[  9. ,  -6.5,  -3.5,  14. ,  13. ],   #第二层维度的梯度
        [-13. ,  -8.5,  -7.5,  -4. ,   3. ],
        [-28. ,  -1.5,   9.5,  -8.5, -11. ]])]

猜你喜欢

转载自blog.csdn.net/zzw1208/article/details/106984745