小余同学的Hello Machine Learning

numpy()的使用

import numpy
numpy.__version__
'1.18.1'
import numpy as np
np.__version__
'1.18.1'
num=[i for i in range(10)]
num
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
num[5]=100
num
[0, 1, 2, 3, 4, 100, 6, 7, 8, 9]
num[5]="Machine Learning"
num
[0, 1, 2, 3, 4, 'Machine Learning', 6, 7, 8, 9]
import array
arr=array.array('i',[i for i in range(10)] )
arr
array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
arr[5]=100
arr
array('i', [0, 1, 2, 3, 4, 100, 6, 7, 8, 9])
arr[5]="Machine Learning"
arr
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

<ipython-input-11-2fdd5eb623eb> in <module>
----> 1 arr[5]="Machine Learning"
      2 arr


TypeError: an integer is required (got type str)

numpy.array()

npyarr=np.array([i for i in range (10)])
npyarr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
npyarr[5]=100
npyarr
array([  0,   1,   2,   3,   4, 100,   6,   7,   8,   9])
npyarr[5]="Machine Learning"
npyarr
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-16-a4ea69df9ec9> in <module>
----> 1 npyarr[5]="Machine Learning"
      2 npyarr


ValueError: invalid literal for int() with base 10: 'Machine Learning'
npyarr.dtype
dtype('int32')
npyarr2=np.array([1,2,3.1])
npyarr2.dtype
dtype('float64')

其他创建numpy的方法

np.zeros(10)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
np.zeros(10,dtype=int)
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
np.zeros((3,5),dtype=int)
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])
np.zeros(shape=(3,5),dtype=int)
array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]])
np.ones((3,5))
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])
np.full((3,5),520)
array([[520, 520, 520, 520, 520],
       [520, 520, 520, 520, 520],
       [520, 520, 520, 520, 520]])
np.full(shape=(3,5),fill_value=520.0)
array([[520., 520., 520., 520., 520.],
       [520., 520., 520., 520., 520.],
       [520., 520., 520., 520., 520.]])

np.arrange()相当于range

[i for i in range(1,10,2)]
[1, 3, 5, 7, 9]
np.arange(1,10,2)
array([1, 3, 5, 7, 9])
np.arange(1,5,0.5)
array([1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

linspace

np.linspace(0,20,11)
array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14., 16., 18., 20.])
np.linspace(0,20,10)
array([ 0.        ,  2.22222222,  4.44444444,  6.66666667,  8.88888889,
       11.11111111, 13.33333333, 15.55555556, 17.77777778, 20.        ])

random

前闭后开,取不到后面的元素。
第三个元素表示size

np.random.randint(0,10)
6
np.random.randint(0,10,size=5)
array([0, 2, 5, 7, 5])
np.random.randint(0,10,size=(3,5))
array([[9, 8, 1, 9, 2],
       [2, 5, 8, 8, 2],
       [6, 8, 4, 7, 2]])
np.random.randint(0,10,size=(3,5))
array([[7, 1, 4, 4, 5],
       [0, 9, 5, 3, 3],
       [6, 6, 6, 5, 4]])

random.seed(666)为种子,可以重现随机数

np.random.seed(666)
np.random.randint(0,10,size=(3,5))
array([[2, 6, 9, 4, 3],
       [1, 0, 8, 7, 5],
       [2, 5, 5, 4, 8]])
np.random.seed(666)
np.random.randint(0,10,size=(3,5))
array([[2, 6, 9, 4, 3],
       [1, 0, 8, 7, 5],
       [2, 5, 5, 4, 8]])
np.random.random()

0.942538963698833
np.random.random(10)
array([0.07473949, 0.27646251, 0.4675855 , 0.31581532, 0.39016259,
       0.26832981, 0.75366384, 0.66673747, 0.87287954, 0.52109719])
np.random.random((3,4))
array([[0.75020425, 0.32940234, 0.29130197, 0.00103619],
       [0.6361797 , 0.97933558, 0.91236279, 0.39925165],
       [0.40322917, 0.33454934, 0.72306649, 0.96832961]])

np.random.normal()生成符合正态分布的随机数

np.random.normal()
0.3139856301975447
np.random.normal(3,5)
4.981928369707971
np.random.normal(3,5,size=10)
array([ 5.88282919,  2.64902965,  7.56252181,  8.02352483,  5.09299645,
       -4.64512271,  7.6243766 ,  3.91527143,  4.7271748 , -1.06577151])
np.random.normal(3,5,(3,5))
array([[ 8.31626911,  4.29331924,  5.36425533,  8.15984898,  2.19771727],
       [ 3.02961764,  0.32736922,  8.75850414, -3.72490541,  1.19403795],
       [-2.75734112,  5.46123875,  4.85432136,  0.34329194,  4.42855501]])
np.random.normal?

numpy.array()的基本操作

import numpy as np
x=np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
X=np.arange(15).reshape(3,5)
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
x.ndim
1
X.ndim
2
x.shape
(10,)
X.shape
(3, 5)
x.size
10
X.size
15

numpy.array()的访问

x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x[-1]
9
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
X[0][0]
0
X[2,3]
13
x[:5]
array([0, 1, 2, 3, 4])
x[::2]
array([0, 2, 4, 6, 8])
X
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

取前两行前三列

X[:2,:3]
array([[0, 1, 2],
       [5, 6, 7]])

下面为错误示范

X[:2][:3]
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
X[:2,::2]
array([[0, 2, 4],
       [5, 7, 9]])
X[::-1,::-1]
array([[14, 13, 12, 11, 10],
       [ 9,  8,  7,  6,  5],
       [ 4,  3,  2,  1,  0]])
X[0]
array([0, 1, 2, 3, 4])
X[0,:]
array([0, 1, 2, 3, 4])
X[0,:].ndim
1
X[:,0]
array([ 0,  5, 10])

subx 与x的变化是同步的,调用copy就不同步

subx=X[:2,:3]
subx
array([[0, 1, 2],
       [5, 6, 7]])
subx[0,0]=100
subx
array([[100,   1,   2],
       [  5,   6,   7]])
X
array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])

调用copy()

subx=X[:2,:3].copy()
subx
array([[100,   1,   2],
       [  5,   6,   7]])
subx[0,0]=520
subx
array([[520,   1,   2],
       [  5,   6,   7]])
X
array([[100,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14]])

reshape()改变矩阵(一维变二维)

x.shape
(10,)
x.reshape(2,5)
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

reshape()不改变原本的矩阵,要想保留改变后的矩阵,必须重新赋值

a=x.reshape(2,5)
a
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

reshape()函数中取-1,自动统计个数。一维数组和一行二位数组是不一样的

b=x.reshape(2,-1)
b
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
b=x.reshape(10,-1)
b
array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
b=x.reshape(1,10)
b
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
b.ndim
2
x.ndim
1

合并操作

x=np.array([1,2,3])
y=np.array([3,2,1])
np.concatenate([x,y])
array([1, 2, 3, 3, 2, 1])
z=np.array([666,666,666])
np.concatenate([x,y,z])
array([  1,   2,   3,   3,   2,   1, 666, 666, 666])
A=np.array([[1,2,3],
           [3,2,1]])
A
array([[1, 2, 3],
       [3, 2, 1]])

二维数组拼接,默认axis=0,即增加元组(拼接在下面)

axis=1表示增加属性(拼接在右边)

np.concatenate([A,A])
array([[1, 2, 3],
       [3, 2, 1],
       [1, 2, 3],
       [3, 2, 1]])
np.concatenate([A,A],axis=1)
array([[1, 2, 3, 1, 2, 3],
       [3, 2, 1, 3, 2, 1]])

concatenate()只能将维数相同的矩阵进行拼接

np.concatenate([A,z])
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-121-abdc54b54f98> in <module>
----> 1 np.concatenate([A,z])


<__array_function__ internals> in concatenate(*args, **kwargs)


ValueError: all the input arrays must have same number of dimensions, but the array at index 0 has 2 dimension(s) and the array at index 1 has 1 dimension(s)
np.concatenate([A,z.reshape(1,-1)])
array([[  1,   2,   3],
       [  3,   2,   1],
       [666, 666, 666]])
A2=np.concatenate([A,z.reshape(1,-1)])
A2
array([[  1,   2,   3],
       [  3,   2,   1],
       [666, 666, 666]])

vstack()垂直方向叠加,hstack()水平方向,容错性更好

np.vstack([A,z])
array([[  1,   2,   3],
       [  3,   2,   1],
       [666, 666, 666]])
B=np.full([2,2],100)
B
array([[100, 100],
       [100, 100]])
np.hstack([A,B])
array([[  1,   2,   3, 100, 100],
       [  3,   2,   1, 100, 100]])

分割操作 split(数组,[分割点])

x=np.arange(10)
x
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
x1,x2,x3=np.split(x,[3,7])
x1
array([0, 1, 2])
x2
array([3, 4, 5, 6])
x3
array([7, 8, 9])
x1,x2=np.split(x,[5])
x1
array([0, 1, 2, 3, 4])
x2
array([5, 6, 7, 8, 9])

分割:二维

A=np.arange(16).reshape(4,4)
A
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
A1,A2=np.split(A,[2])
A1
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
A2
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
A1,A2=np.split(A,[2],axis=1)
A1
array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])
A2
array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])

vsplit()、hsplit()

higher,lower=np.vsplit(A,[2])
lower
array([[ 8,  9, 10, 11],
       [12, 13, 14, 15]])
higher
array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
left,right=np.hsplit(A,[2])
left
array([[ 0,  1],
       [ 4,  5],
       [ 8,  9],
       [12, 13]])
right
array([[ 2,  3],
       [ 6,  7],
       [10, 11],
       [14, 15]])
data=np.arange(16).reshape(4,4)
data
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
x,y=np.hsplit(data,[-1])
x
array([[ 0,  1,  2],
       [ 4,  5,  6],
       [ 8,  9, 10],
       [12, 13, 14]])
y
array([[ 3],
       [ 7],
       [11],
       [15]])

转成向量

y[:,0]
array([ 3,  7, 11, 15])

numpy.array()中的运算

n=10
L=[i for i in range(n)]
L*2
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
%%time
A=[]
for e in L:
    A.append(2*e)
A
Wall time: 1e+03 µs





[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
%%time
A=[e*2 for e in L]
A
Wall time: 0 ns





[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
import numpy as np
L=np.arange(n)
%%time
A=np.array(2*e for e in L)
A
Wall time: 1 ms





array(<generator object <genexpr> at 0x0000020FD749A248>, dtype=object)
%%time
2*L
Wall time: 0 ns





array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])
A
array(<generator object <genexpr> at 0x0000020FD749A248>, dtype=object)
n=10
L=np.arange(n)
2*L
array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

Universal Function

X=np.arange(1,16).reshape(3,5)
X
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
X+1
array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16]])
X-1
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
X*2
array([[ 2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20],
       [22, 24, 26, 28, 30]])
X/2
array([[0.5, 1. , 1.5, 2. , 2.5],
       [3. , 3.5, 4. , 4.5, 5. ],
       [5.5, 6. , 6.5, 7. , 7.5]])
X//2
array([[0, 1, 1, 2, 2],
       [3, 3, 4, 4, 5],
       [5, 6, 6, 7, 7]], dtype=int32)
import numpy as np
n=10
L=np.arange(n)
X=np.arange(1,16).reshape(3,5)
X
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
np.abs(X)
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15]])
np.sin(X)
array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111],
       [-0.99999021, -0.53657292,  0.42016704,  0.99060736,  0.65028784]])
np.cos(X)
array([[ 0.54030231, -0.41614684, -0.9899925 , -0.65364362,  0.28366219],
       [ 0.96017029,  0.75390225, -0.14550003, -0.91113026, -0.83907153],
       [ 0.0044257 ,  0.84385396,  0.90744678,  0.13673722, -0.75968791]])
np.tan(X)
array([[ 1.55740772e+00, -2.18503986e+00, -1.42546543e-01,
         1.15782128e+00, -3.38051501e+00],
       [-2.91006191e-01,  8.71447983e-01, -6.79971146e+00,
        -4.52315659e-01,  6.48360827e-01],
       [-2.25950846e+02, -6.35859929e-01,  4.63021133e-01,
         7.24460662e+00, -8.55993401e-01]])

exp(X)取e的x次方

np.exp(X)
array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
        1.48413159e+02],
       [4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
        2.20264658e+04],
       [5.98741417e+04, 1.62754791e+05, 4.42413392e+05, 1.20260428e+06,
        3.26901737e+06]])

a的x次方

np.power(3,X)
array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)
3**X
array([[       3,        9,       27,       81,      243],
       [     729,     2187,     6561,    19683,    59049],
       [  177147,   531441,  1594323,  4782969, 14348907]], dtype=int32)
np.log(X)
array([[0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791],
       [1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509],
       [2.39789527, 2.48490665, 2.56494936, 2.63905733, 2.7080502 ]])
np.log2(X)
array([[0.        , 1.        , 1.5849625 , 2.        , 2.32192809],
       [2.5849625 , 2.80735492, 3.        , 3.169925  , 3.32192809],
       [3.45943162, 3.5849625 , 3.70043972, 3.80735492, 3.9068906 ]])
np.log10(X)
array([[0.        , 0.30103   , 0.47712125, 0.60205999, 0.69897   ],
       [0.77815125, 0.84509804, 0.90308999, 0.95424251, 1.        ],
       [1.04139269, 1.07918125, 1.11394335, 1.14612804, 1.17609126]])

矩阵运算

A=np.arange(4).reshape(2,2)
A
array([[0, 1],
       [2, 3]])
B=np.full((2,2),10)
B
array([[10, 10],
       [10, 10]])
A+B
array([[10, 11],
       [12, 13]])
A-B
array([[-10,  -9],
       [ -8,  -7]])

A*B、A/B是矩阵对应元素相乘、除,不是矩阵相乘

A*B
array([[ 0, 10],
       [20, 30]])
A/B
array([[0. , 0.1],
       [0.2, 0.3]])

A.dot(B)矩阵相乘

A.dot(B)
array([[10, 10],
       [50, 50]])

矩阵的转置A.T

A.T
array([[0, 2],
       [1, 3]])

矩阵和向量的运算

v=np.array([1,2])
np.vstack([v]*A.shape[0])
array([[1, 2],
       [1, 2]])
np.vstack([v]*2)
array([[1, 2],
       [1, 2]])
np.vstack([v]*A.shape[0])+A
array([[1, 3],
       [3, 5]])

np.tile(V,(a,b))表示向量的堆叠

a表示横着堆叠次数,b表示竖着堆叠次数

np.tile(v,(2,1))
array([[1, 2],
       [1, 2]])
np.tile(v,(2,1))+A
array([[1, 3],
       [3, 5]])

向量和矩阵相乘

v
array([1, 2])
A
array([[0, 1],
       [2, 3]])
v*A
array([[0, 2],
       [2, 6]])
v.dot(A)
array([4, 7])
A.dot(v)
array([2, 8])

逆矩阵np.linalg.inv(A),A必须是方阵n*n

np.linalg.inv(A)
array([[-1.5,  0.5],
       [ 1. ,  0. ]])
invA=np.linalg.inv(A)
A.dot(invA)
array([[1., 0.],
       [0., 1.]])
invA.dot(A)
array([[1., 0.],
       [0., 1.]])

伪逆矩阵

X=np.arange(16).reshape(2,8)
X
array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15]])
np.linalg.inv(X)
---------------------------------------------------------------------------

LinAlgError                               Traceback (most recent call last)

<ipython-input-49-47889a8f1529> in <module>
----> 1 np.linalg.inv(X)


<__array_function__ internals> in inv(*args, **kwargs)


E:\Anaconda\lib\site-packages\numpy\linalg\linalg.py in inv(a)
    540     a, wrap = _makearray(a)
    541     _assert_stacked_2d(a)
--> 542     _assert_stacked_square(a)
    543     t, result_t = _commonType(a)
    544 


E:\Anaconda\lib\site-packages\numpy\linalg\linalg.py in _assert_stacked_square(*arrays)
    211         m, n = a.shape[-2:]
    212         if m != n:
--> 213             raise LinAlgError('Last 2 dimensions of the array must be square')
    214 
    215 def _assert_finite(*arrays):


LinAlgError: Last 2 dimensions of the array must be square
pinvX=np.linalg.pinv(X)
pinvX
array([[-1.35416667e-01,  5.20833333e-02],
       [-1.01190476e-01,  4.16666667e-02],
       [-6.69642857e-02,  3.12500000e-02],
       [-3.27380952e-02,  2.08333333e-02],
       [ 1.48809524e-03,  1.04166667e-02],
       [ 3.57142857e-02, -1.04083409e-17],
       [ 6.99404762e-02, -1.04166667e-02],
       [ 1.04166667e-01, -2.08333333e-02]])
X.dot(pinvX)
array([[ 1.00000000e+00, -2.49800181e-16],
       [ 0.00000000e+00,  1.00000000e+00]])
pinvX.shape
(8, 2)

聚合运算

import numpy as np
L=np.random.random(100)
sum(L)
47.25432370057043
np.sum(L)
47.25432370057044
big_array=np.random.rand(100000)
%timeit sum(big_array)
%timeit np.sum(big_array)
19.2 ms ± 739 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
62.8 µs ± 4.41 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)

numpy能给大大提高效率,节约运算时间

np.min(big_array)
4.1163388675435897e-07
np.max(big_array)
0.9999971388067384
big_array.sum()
49972.7561181783
big_array.min()
4.1163388675435897e-07
big_array.max()
0.9999971388067384

二维聚合运算

X=np.arange(16).reshape(4,-1)
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
np.sum(X)
120

sum(X,axis=0),计算每一列的和,axis=0计算每行的和

np.sum(X,axis=0)
array([24, 28, 32, 36])
np.max(X)
15
X.sum()
120

numpy.prod(X),表示X矩阵中的每个元素相乘;np.mean()求平均值;np.median()求中位数

np.prod(X)
0
np.prod(X+1)
2004189184
np.mean(X)
7.5
np.median(X)
7.5
v=np.array([1,1,2,2,10])
np.mean(v)
3.2
np.median(v)
2.0

相比于平均数,在有样本差距比较大的情况下,中位数描述整体更好

np.percentile(big_array,50)百分位点

np.percentile(big_array,50)
0.4987236949045198
np.percentile(big_array,100)
0.9999971388067384
for percent in [0,25,50,75,100]:
    print(np.percentile(big_array,q=percent)) 
4.1163388675435897e-07
0.25178892851738666
0.4987236949045198
0.7483803833973868
0.9999971388067384

np.var(big_array)算方差,np.std(big_array)算标准差

np.var(big_array)
0.08304420178640745
np.std(big_array)
0.28817390892724387
x=np.random.normal(0,1,size=1000000)
np.mean(x)
-0.0009100538882577226
np.var(x)
1.001133568111869

索引

import numpy as np
x=np.random.normal(0,1,size=1000000)
np.min(x)
-4.826865639176192
np.argmin(x)
949594
x[949594]
-4.826865639176192
np.argmax(x)
412350
x[412350]
4.687002157847309
np.max(x)
4.687002157847309

排序和使用索引

x=np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

np.random.shuffle(x)乱序操作

np.random.shuffle(x)
x
array([ 7, 15,  9, 11,  1, 10,  3,  6,  2, 13,  8,  0,  5,  4, 14, 12])
np.sort(x)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x
array([ 7, 15,  9, 11,  1, 10,  3,  6,  2, 13,  8,  0,  5,  4, 14, 12])
x.sort()
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

np.sort(x)排序结果不保存在x中;x.sort()排序结果保存在x中

X=np.random.randint(10,size=(4,4))
X
array([[3, 5, 8, 7],
       [7, 1, 0, 9],
       [0, 9, 5, 3],
       [7, 1, 0, 0]])
np.sort(X)
array([[3, 5, 7, 8],
       [0, 1, 7, 9],
       [0, 3, 5, 9],
       [0, 0, 1, 7]])

axis=1,对行进行排序;axis=0对行进列排序

np.sort(X,axis=1)
array([[3, 5, 7, 8],
       [0, 1, 7, 9],
       [0, 3, 5, 9],
       [0, 0, 1, 7]])
np.sort(X,axis=0)
array([[0, 1, 0, 0],
       [3, 1, 0, 3],
       [7, 5, 5, 7],
       [7, 9, 8, 9]])
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
np.random.shuffle(x)
x
array([ 9,  6, 13, 14,  7,  0, 10, 11,  8,  1, 15,  3, 12,  4,  2,  5])
np.argsort(x)
array([ 5,  9, 14, 11, 13, 15,  1,  4,  8,  0,  6,  7, 12,  2,  3, 10],
      dtype=int64)

以上是排序后的结果对应的索引值

快排partition(,)

np.random.shuffle(x)
x
array([13, 15,  9, 12, 10, 14,  7,  6,  3,  8,  0,  1,  5,  2, 11,  4])
np.partition(x,3)
array([ 1,  0,  2,  3,  4,  5,  6, 15,  7,  8, 14, 10, 12,  9, 11, 13])
np.argpartition(x,3)
array([11, 10, 13,  8, 15, 12,  7,  1,  6,  9,  5,  4,  3,  2, 14,  0],
      dtype=int64)
X
array([[3, 5, 8, 7],
       [7, 1, 0, 9],
       [0, 9, 5, 3],
       [7, 1, 0, 0]])
np.argsort(X,axis=1)
array([[0, 1, 3, 2],
       [2, 1, 0, 3],
       [0, 3, 2, 1],
       [2, 3, 1, 0]], dtype=int64)
np.argsort(X,axis=0)
array([[2, 1, 1, 3],
       [0, 3, 3, 2],
       [1, 0, 2, 0],
       [3, 2, 0, 1]], dtype=int64)
np.sort(X,axis=1)
array([[3, 5, 7, 8],
       [0, 1, 7, 9],
       [0, 3, 5, 9],
       [0, 0, 1, 7]])
np.sort(X,axis=0)
array([[0, 1, 0, 0],
       [3, 1, 0, 3],
       [7, 5, 5, 7],
       [7, 9, 8, 9]])
np.argpartition(X,2,axis=1)
array([[0, 1, 3, 2],
       [2, 1, 0, 3],
       [0, 3, 2, 1],
       [2, 3, 1, 0]], dtype=int64)
np.argpartition(X,2,axis=0)
array([[2, 1, 1, 3],
       [0, 3, 3, 2],
       [1, 0, 2, 0],
       [3, 2, 0, 1]], dtype=int64)

Fancy indexing

x=np.arange(16)
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x[3]
3
x[3:9]
array([3, 4, 5, 6, 7, 8])
x[3:9:2]
array([3, 5, 7])
[x[3],x[5],x[8]]
[3, 5, 8]
index=[3,5,8]
x[index]
array([3, 5, 8])
index=np.array([[0,2],
                [1,3]])
x[index]
array([[0, 2],
       [1, 3]])
X=x.reshape(4,-1)
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
row=np.array([0,1,2])
array([ 1,  6, 11])
X[0,col]
array([1, 2, 3])
X[:2,col]
array([[1, 2, 3],
       [5, 6, 7]])
col=[True,False,True,True]

(0,2,3)

X[1:3,col]
array([[ 4,  6,  7],
       [ 8, 10, 11]])

numpy.array()中的比较

x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
x<3
array([ True,  True,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False])
X>3
array([[False, False, False, False],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])
x<=3
array([ True,  True,  True,  True, False, False, False, False, False,
       False, False, False, False, False, False, False])
x==3
array([False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False])
x!=3
array([ True,  True,  True, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True])
2*x==24-4*x
array([False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False])
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
X<6
array([[ True,  True,  True,  True],
       [ True,  True, False, False],
       [False, False, False, False],
       [False, False, False, False]])
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
sum(x<=3)
4
np.count_nonzero(x<=3)
4

np.any()有一个满足条件为True,np.all()所有条件都满足才为Ture

np.any(x==0)
True
np.any(x<0)
False
np.all(x>=0)
True
np.all(x>0)
False
X
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
np.sum(X%2==0)
8
np.sum(X%2==0,axis=1)
array([2, 2, 2, 2])
np.sum(X%2==0,axis=0)
array([4, 0, 4, 0])
np.all(X>0,axis=1)
array([False,  True,  True,  True])
x
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
np.sum((x>3)&(x<8))
4
np.sum((x%2==0)|(x>10))
11
np.sum(~(x==0))
15
x[x<5]
array([0, 1, 2, 3, 4])
x[x%2==0]
array([ 0,  2,  4,  6,  8, 10, 12, 14])
X[X[:,3]%3==0,:]
array([[ 0,  1,  2,  3],
       [12, 13, 14, 15]])

matplotlib

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
x=np.linspace(0,10,100)
x
array([ 0.        ,  0.1010101 ,  0.2020202 ,  0.3030303 ,  0.4040404 ,
        0.50505051,  0.60606061,  0.70707071,  0.80808081,  0.90909091,
        1.01010101,  1.11111111,  1.21212121,  1.31313131,  1.41414141,
        1.51515152,  1.61616162,  1.71717172,  1.81818182,  1.91919192,
        2.02020202,  2.12121212,  2.22222222,  2.32323232,  2.42424242,
        2.52525253,  2.62626263,  2.72727273,  2.82828283,  2.92929293,
        3.03030303,  3.13131313,  3.23232323,  3.33333333,  3.43434343,
        3.53535354,  3.63636364,  3.73737374,  3.83838384,  3.93939394,
        4.04040404,  4.14141414,  4.24242424,  4.34343434,  4.44444444,
        4.54545455,  4.64646465,  4.74747475,  4.84848485,  4.94949495,
        5.05050505,  5.15151515,  5.25252525,  5.35353535,  5.45454545,
        5.55555556,  5.65656566,  5.75757576,  5.85858586,  5.95959596,
        6.06060606,  6.16161616,  6.26262626,  6.36363636,  6.46464646,
        6.56565657,  6.66666667,  6.76767677,  6.86868687,  6.96969697,
        7.07070707,  7.17171717,  7.27272727,  7.37373737,  7.47474747,
        7.57575758,  7.67676768,  7.77777778,  7.87878788,  7.97979798,
        8.08080808,  8.18181818,  8.28282828,  8.38383838,  8.48484848,
        8.58585859,  8.68686869,  8.78787879,  8.88888889,  8.98989899,
        9.09090909,  9.19191919,  9.29292929,  9.39393939,  9.49494949,
        9.5959596 ,  9.6969697 ,  9.7979798 ,  9.8989899 , 10.        ])
y=np.sin(x)
y
array([ 0.        ,  0.10083842,  0.20064886,  0.2984138 ,  0.39313661,
        0.48385164,  0.56963411,  0.64960951,  0.72296256,  0.78894546,
        0.84688556,  0.8961922 ,  0.93636273,  0.96698762,  0.98775469,
        0.99845223,  0.99897117,  0.98930624,  0.96955595,  0.93992165,
        0.90070545,  0.85230712,  0.79522006,  0.73002623,  0.65739025,
        0.57805259,  0.49282204,  0.40256749,  0.30820902,  0.21070855,
        0.11106004,  0.01027934, -0.09060615, -0.19056796, -0.28858706,
       -0.38366419, -0.47483011, -0.56115544, -0.64176014, -0.7158225 ,
       -0.7825875 , -0.84137452, -0.89158426, -0.93270486, -0.96431712,
       -0.98609877, -0.99782778, -0.99938456, -0.99075324, -0.97202182,
       -0.94338126, -0.90512352, -0.85763861, -0.80141062, -0.73701276,
       -0.66510151, -0.58640998, -0.50174037, -0.41195583, -0.31797166,
       -0.22074597, -0.12126992, -0.0205576 ,  0.0803643 ,  0.18046693,
        0.27872982,  0.37415123,  0.46575841,  0.55261747,  0.63384295,
        0.7086068 ,  0.77614685,  0.83577457,  0.8868821 ,  0.92894843,
        0.96154471,  0.98433866,  0.99709789,  0.99969234,  0.99209556,
        0.97438499,  0.94674118,  0.90944594,  0.86287948,  0.8075165 ,
        0.74392141,  0.6727425 ,  0.59470541,  0.51060568,  0.42130064,
        0.32770071,  0.23076008,  0.13146699,  0.03083368, -0.07011396,
       -0.17034683, -0.26884313, -0.36459873, -0.45663749, -0.54402111])
plt.plot(x,y)
[<matplotlib.lines.Line2D at 0x1ba3653b088>]

在这里插入图片描述

siny=y.copy()
cosy=np.cos(x)
plt.plot(x,siny)
plt.plot(x,cosy)
[<matplotlib.lines.Line2D at 0x1ba39241588>]

在这里插入图片描述

plt.plot(x,cosy,color='red')
plt.plot(x,siny)
[<matplotlib.lines.Line2D at 0x1ba392aacc8>]

在这里插入图片描述

plt.plot(x,cosy,color='red')
plt.plot(x,siny)
plt.xlim(-5,15)
plt.ylim(0,1.5)
plt.title("hhhhh")
plt.xlable("x")
plt.ylable("y")
---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)

<ipython-input-81-9859a6e52146> in <module>
      4 plt.ylim(0,1.5)
      5 plt.title("hhhhh")
----> 6 plt.xlable("x")
      7 plt.ylable("y")


AttributeError: module 'matplotlib.pyplot' has no attribute 'xlable'

在这里插入图片描述

scatter plot

plt.scatter(x,siny)
<matplotlib.collections.PathCollection at 0x1ba393c43c8>

在这里插入图片描述

plt.scatter(x,siny)
plt.scatter(x,cosy,color="red")
<matplotlib.collections.PathCollection at 0x1ba2bdc8048>

在这里插入图片描述

x=np.random.normal(0,1,10000)
y=np.random.normal(0,1,10000)
plt.scatter(x,y,alpha=0.4)
<matplotlib.collections.PathCollection at 0x1ba3bcc59c8>

[

读取数据和简单的数据探索

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn import datasetssets
iris=datasets.load_iris()
iris.keys()
dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])
print(iris.DESCR)
.. _iris_dataset:

Iris plants dataset
--------------------

**Data Set Characteristics:**

    :Number of Instances: 150 (50 in each of three classes)
    :Number of Attributes: 4 numeric, predictive attributes and the class
    :Attribute Information:
        - sepal length in cm
        - sepal width in cm
        - petal length in cm
        - petal width in cm
        - class:
                - Iris-Setosa
                - Iris-Versicolour
                - Iris-Virginica
                
    :Summary Statistics:

    ============== ==== ==== ======= ===== ====================
                    Min  Max   Mean    SD   Class Correlation
    ============== ==== ==== ======= ===== ====================
    sepal length:   4.3  7.9   5.84   0.83    0.7826
    sepal width:    2.0  4.4   3.05   0.43   -0.4194
    petal length:   1.0  6.9   3.76   1.76    0.9490  (high!)
    petal width:    0.1  2.5   1.20   0.76    0.9565  (high!)
    ============== ==== ==== ======= ===== ====================

    :Missing Attribute Values: None
    :Class Distribution: 33.3% for each of 3 classes.
    :Creator: R.A. Fisher
    :Donor: Michael Marshall (MARSHALL%[email protected])
    :Date: July, 1988

The famous Iris database, first used by Sir R.A. Fisher. The dataset is taken
from Fisher's paper. Note that it's the same as in R, but not as in the UCI
Machine Learning Repository, which has two wrong data points.

This is perhaps the best known database to be found in the
pattern recognition literature.  Fisher's paper is a classic in the field and
is referenced frequently to this day.  (See Duda & Hart, for example.)  The
data set contains 3 classes of 50 instances each, where each class refers to a
type of iris plant.  One class is linearly separable from the other 2; the
latter are NOT linearly separable from each other.

.. topic:: References

   - Fisher, R.A. "The use of multiple measurements in taxonomic problems"
     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
     Mathematical Statistics" (John Wiley, NY, 1950).
   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
     Structure and Classification Rule for Recognition in Partially Exposed
     Environments".  IEEE Transactions on Pattern Analysis and Machine
     Intelligence, Vol. PAMI-2, No. 1, 67-71.
   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions
     on Information Theory, May 1972, 431-433.
   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
     conceptual clustering system finds 3 classes in the data.
   - Many, many more ...
iris.data
array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.2],
       [5. , 3.2, 1.2, 0.2],
       [5.5, 3.5, 1.3, 0.2],
       [4.9, 3.6, 1.4, 0.1],
       [4.4, 3. , 1.3, 0.2],
       [5.1, 3.4, 1.5, 0.2],
       [5. , 3.5, 1.3, 0.3],
       [4.5, 2.3, 1.3, 0.3],
       [4.4, 3.2, 1.3, 0.2],
       [5. , 3.5, 1.6, 0.6],
       [5.1, 3.8, 1.9, 0.4],
       [4.8, 3. , 1.4, 0.3],
       [5.1, 3.8, 1.6, 0.2],
       [4.6, 3.2, 1.4, 0.2],
       [5.3, 3.7, 1.5, 0.2],
       [5. , 3.3, 1.4, 0.2],
       [7. , 3.2, 4.7, 1.4],
       [6.4, 3.2, 4.5, 1.5],
       [6.9, 3.1, 4.9, 1.5],
       [5.5, 2.3, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [5.7, 2.8, 4.5, 1.3],
       [6.3, 3.3, 4.7, 1.6],
       [4.9, 2.4, 3.3, 1. ],
       [6.6, 2.9, 4.6, 1.3],
       [5.2, 2.7, 3.9, 1.4],
       [5. , 2. , 3.5, 1. ],
       [5.9, 3. , 4.2, 1.5],
       [6. , 2.2, 4. , 1. ],
       [6.1, 2.9, 4.7, 1.4],
       [5.6, 2.9, 3.6, 1.3],
       [6.7, 3.1, 4.4, 1.4],
       [5.6, 3. , 4.5, 1.5],
       [5.8, 2.7, 4.1, 1. ],
       [6.2, 2.2, 4.5, 1.5],
       [5.6, 2.5, 3.9, 1.1],
       [5.9, 3.2, 4.8, 1.8],
       [6.1, 2.8, 4. , 1.3],
       [6.3, 2.5, 4.9, 1.5],
       [6.1, 2.8, 4.7, 1.2],
       [6.4, 2.9, 4.3, 1.3],
       [6.6, 3. , 4.4, 1.4],
       [6.8, 2.8, 4.8, 1.4],
       [6.7, 3. , 5. , 1.7],
       [6. , 2.9, 4.5, 1.5],
       [5.7, 2.6, 3.5, 1. ],
       [5.5, 2.4, 3.8, 1.1],
       [5.5, 2.4, 3.7, 1. ],
       [5.8, 2.7, 3.9, 1.2],
       [6. , 2.7, 5.1, 1.6],
       [5.4, 3. , 4.5, 1.5],
       [6. , 3.4, 4.5, 1.6],
       [6.7, 3.1, 4.7, 1.5],
       [6.3, 2.3, 4.4, 1.3],
       [5.6, 3. , 4.1, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [5.5, 2.6, 4.4, 1.2],
       [6.1, 3. , 4.6, 1.4],
       [5.8, 2.6, 4. , 1.2],
       [5. , 2.3, 3.3, 1. ],
       [5.6, 2.7, 4.2, 1.3],
       [5.7, 3. , 4.2, 1.2],
       [5.7, 2.9, 4.2, 1.3],
       [6.2, 2.9, 4.3, 1.3],
       [5.1, 2.5, 3. , 1.1],
       [5.7, 2.8, 4.1, 1.3],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 2.7, 5.1, 1.9],
       [7.1, 3. , 5.9, 2.1],
       [6.3, 2.9, 5.6, 1.8],
       [6.5, 3. , 5.8, 2.2],
       [7.6, 3. , 6.6, 2.1],
       [4.9, 2.5, 4.5, 1.7],
       [7.3, 2.9, 6.3, 1.8],
       [6.7, 2.5, 5.8, 1.8],
       [7.2, 3.6, 6.1, 2.5],
       [6.5, 3.2, 5.1, 2. ],
       [6.4, 2.7, 5.3, 1.9],
       [6.8, 3. , 5.5, 2.1],
       [5.7, 2.5, 5. , 2. ],
       [5.8, 2.8, 5.1, 2.4],
       [6.4, 3.2, 5.3, 2.3],
       [6.5, 3. , 5.5, 1.8],
       [7.7, 3.8, 6.7, 2.2],
       [7.7, 2.6, 6.9, 2.3],
       [6. , 2.2, 5. , 1.5],
       [6.9, 3.2, 5.7, 2.3],
       [5.6, 2.8, 4.9, 2. ],
       [7.7, 2.8, 6.7, 2. ],
       [6.3, 2.7, 4.9, 1.8],
       [6.7, 3.3, 5.7, 2.1],
       [7.2, 3.2, 6. , 1.8],
       [6.2, 2.8, 4.8, 1.8],
       [6.1, 3. , 4.9, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [7.2, 3. , 5.8, 1.6],
       [7.4, 2.8, 6.1, 1.9],
       [7.9, 3.8, 6.4, 2. ],
       [6.4, 2.8, 5.6, 2.2],
       [6.3, 2.8, 5.1, 1.5],
       [6.1, 2.6, 5.6, 1.4],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.4, 5.6, 2.4],
       [6.4, 3.1, 5.5, 1.8],
       [6. , 3. , 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [6.7, 3.1, 5.6, 2.4],
       [6.9, 3.1, 5.1, 2.3],
       [5.8, 2.7, 5.1, 1.9],
       [6.8, 3.2, 5.9, 2.3],
       [6.7, 3.3, 5.7, 2.5],
       [6.7, 3. , 5.2, 2.3],
       [6.3, 2.5, 5. , 1.9],
       [6.5, 3. , 5.2, 2. ],
       [6.2, 3.4, 5.4, 2.3],
       [5.9, 3. , 5.1, 1.8]])
iris.data.shape
(150, 4)
iris.feature_names
['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']
iris.target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
iris.target.shape
(150,)
iris.target_names
array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
X=iris.data[:,:2]
X.shape
(150, 2)
plt.scatter(X[:,0],X[:,1])
<matplotlib.collections.PathCollection at 0x1ba3e7f2dc8>

在这里插入图片描述

y=iris.target
plt.scatter(X[y==0,0],X[y==0,1],color="red",marker="o")
plt.scatter(X[y==1,0],X[y==1,1],color="blue",marker="+")
plt.scatter(X[y==2,0],X[y==2,1],color="green",marker="x")
<matplotlib.collections.PathCollection at 0x1ba3e701dc8>

在这里插入图片描述

X=iris.data[:,2:]
y=iris.target
plt.scatter(X[y==0,0],X[y==0,1],color="red",marker="o")
plt.scatter(X[y==1,0],X[y==1,1],color="blue",marker="+")
plt.scatter(X[y==2,0],X[y==2,1],color="green",marker="x")
<matplotlib.collections.PathCollection at 0x1ba3e79ed08>

在这里插入图片描述

扫描二维码关注公众号,回复: 11628377 查看本文章

猜你喜欢

转载自blog.csdn.net/chairon/article/details/107040837