python学习-NumPy基础

1、NumPy 创建及类型转换

np.arange(10)
#array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
list(np.arange(10))  #NumPy 转为list
#[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

data = [6, 7, 8, 0, 1] #list转为NumPy
arr=np.array(data)
#array([6, 7, 8, 0, 1])
arr.ndim
arr.shape
arr.dtype
#创建以0位初始值的arrays
np.zeros((2,4))
np.empty((2,4))#没有初始化值的数组
#np.array 类型设置与转换
numeric_strings = np.array(['1.25', '-9.6', '42'], dtype=np.string_)
#array([b'1.25', b'-9.6', b'42'], dtype='|S4')
numeric_strings.astype(float)
#array([ 1.25, -9.6 , 42.  ])

2、NumPy运算

arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr + arr
arr - arr
arr * arr
1/arr
arr **2 
arr ** 0.5

arr1 = np.array([[0., 4., 1.], [7., 2., 12.]])
arr >arr1

3、NumPy索引和切片

arr = np.arange(10) #array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5] # 5 获取索引是5的值
arr[5:8] #获取索引5到8的值
arr[5:8]=10 #索引5-8的值赋为10 array([ 0,  1,  2,  3,  4, 10, 10, 10,  8,  9])

#所有对数组的改变都是对原数组的改变

arr[5:8].copy() 拷贝新的数组

#二维数组
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[1][0]
arr2d[1,0]#等价的
arr2d[:2] #按行切片，取前2行
#array([[1, 2, 3],[4, 5, 6]])
arr2d[:2, 1:] #按行、列维度分别取数
#array([[2, 3],[5, 6]])
arr2d[:, :1]   #：取整个维的数据
#array([[1],[4],[7]])

4、布尔索引

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)
#array([[-1.4940666 ,  1.18154707, -1.15719113, -0.96762363],
       [ 0.43512052,  0.84789969, -0.51722385, -0.21203284],
       [-1.06864135, -2.30528408, -1.22800202,  1.05406909],
       [-1.27620432, -0.09847922, -0.07406542,  0.66546799],
       [ 0.79388547, -0.47114695, -0.31333488,  0.25108373],
       [-0.50338099,  0.50331694, -0.56595601, -0.48632843],
       [ 0.48545659,  0.0084995 ,  1.15160201, -0.10719972]])
names=='Bob'
#array([ True, False, False,  True, False, False, False])
data[names == 'Bob']
#array([[-1.4940666 ,  1.18154707, -1.15719113, -0.96762363],
       [-1.27620432, -0.09847922, -0.07406542,  0.66546799]])
data[names == 'Bob', 2:]
names != 'Bob'
data[~(names == 'Bob')] #等价!= ~
# 可以使用& 和 | 符号进行运算
 data[data < 0] =0 #设置所有小于0的数为0
 data[names != 'Joe'] = 7  设置所有不为joe的值为7

5、数组转置和交换

arr = np.arange(16).reshape((2, 2, 4))
arr.T #转置
arr.transpose((1, 0, 2))  #第一二维交换、三维不变
arr.swapaxes(1,2)#交换2，3维数据

6、通用数组函数、一元函数（P107）

arr = np.arange(10)
np.sqrt(arr)
np.exp(arr)

x = np.random.randn(8)
y = np.random.randn(8)
np.maximum(x,y)

arr = np.random.randn(7) * 5
remainder, whole_part = np.modf(arr)

np.sqrt(arr,arr) #把sqrt结果返回arr数组覆盖原有值

7、数组操作的逻辑表达条件

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])
cond = np.array([True, False, True, True, False])
[(x if c else y) for x,y,c in zip(xarr,yarr,cond)] 
#等价于下面表达式
np.where(cond, xarr, yarr)
#根据值替换数组
arr = np.random.randn(4, 4)
arr>0
np.where(arr>0, 2, -2)
np.where(arr>0, 2, arr)

8、数学和统计方法

arr = np.arange(10).reshape(2,5)
arr
arr.mean()
arr.sum()
np.mean(arr)
arr.mean(axis=0)
arr.cumsum()

#大于2的值的数量
(arr>2).sum()

bools = np.array([False, False, True, False])
bools.any() #是否有一个为true
bools.all()#是否都为true

9、数组唯一取值

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)
sorted(set(names))#等价方法

#intersect1d(x,y)
#union1d(x, y)
#in1d(x, y)
#setdiff1d(x, y)
#setxor1d(x, y)

10、使用数组输入输出

arr = np.arange(10)
np.save('some_array', arr)
np.load('some_array.npy')
np.savez('array_archive.npz', a=arr, b=arr)#save multiple arrays in an uncompressed archive using np.savez
arch = np.load('array_archive.npz')
arch['b']
np.savez_compressed('arrays_compressed.npz', a=arr, b=arr)

11、线性代数

#矩阵相乘
x = np.array([[1., 2., 3.], [4., 5., 6.]])
x @ np.ones(3)

#线性代数库实现相关操作
from numpy.linalg import inv, qr
diag 对角线
dot 内积
trace 对角线的和

12、随机数

np.random.normal(size=(4, 4))

from random import normalvariate

%timeit samples = [normalvariate(0, 1) for _ in range(N)]
%timeit np.random.normal(size=N)
np.random.seed(1234)
np.random.RandomState(1234)
#随机游走
#1
position =0
walk = [position]
steps =1000
for i in range(steps):
    step = 1 if random.randint(0,1) else -1
    position += step
    walk.append(position)
plt.plot(walk[:100])
#2
nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

(np.abs(walk) >= 10).argmax() #返回第一次出现10的索引
#3重复5000次随机游走
nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # 0 or 1
steps= np.where(draws >0, 1, -1)
walks=steps.cumsum(1)

walks.min() #所有的最小值
hits30 = (np.abs(walks) >= 30).any(1)#是否有值绝对值大于等于30
 hits30.sum()#总数

crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1) #获取超过30的次数

steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps)) #概率不同的随机游走

python学习-NumPy基础

猜你喜欢