此notebook素材来自kesci.com两个练习系列.

Numpy快速上手指南 —- 基础篇

数据大小

定义

axes: 轴
rank: 秩

例如:
[[ 1., 0., 0.], [ 0., 1., 2.]] 的axes维度为2, 第一个维度rank2, 第二个维度rank3

python

1	import numpy as np

python

1 2	a = np.arange(15).reshape(3,5) a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

查看维度(dimension)

python

a.shape

(3, 5)

查看多少个轴

python

a.ndim

查看数据类型

python

a.dtype

dtype('int32')

python

1	a.dtype.name

'int32'

每个元素的字节大小

python

1	a.itemsize

总个数

python

a.size

类型

python

type(a)

numpy.ndarray

数组创建

list创建

python

1 2	a = np.array([2,3,4]) a

array([2, 3, 4])

python

a.dtype

dtype('int32')

python

1 2	a = np.array([(1.5,2,3),(4,5,6)]) a

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

python

a.dtype

dtype('float64')

指定数据类型创建

python

1 2	a = np.array([[1,2],[3,4]], dtype=complex ) a

array([[1.+0.j, 2.+0.j],
       [3.+0.j, 4.+0.j]])

python

a.dtype

dtype('complex128')

python

1	a.itemsize

内置函数创建

通常，数组的元素开始都是未知的，但是它的大小已知。因此，Numpy提供了一些使用占位符创建数组的函数。这最小化了扩展数组的需要和高昂的运算代价。

全0数组

python

1	np.zeros((3,4),dtype=np.float64)

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

按变量shape创建

python

1 2	a = np.array([1,2,3]) a

array([1, 2, 3])

python

1	np.zeros_like(a)

array([0, 0, 0])

全1数组

python

1	np.ones((2,3,4), dtype=np.int16)

array([[[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]],

       [[1, 1, 1, 1],
        [1, 1, 1, 1],
        [1, 1, 1, 1]]], dtype=int16)

按变量shape创建

python

1 2	a = np.array([1,2,3]) a

array([1, 2, 3])

python

1	np.ones_like(a)

array([1, 1, 1])

空数组

python

1	np.empty((2,5))

array([[1.37700972e-311, 2.32210854e-322, 0.00000000e+000,
        0.00000000e+000, 0.00000000e+000],
       [1.16095484e-028, 7.52736939e+252, 2.09570572e-110,
        2.91237123e+257, 4.71294503e+257]])

单位对角矩阵

python

np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

python

1	np.identity(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

等间隔数列

aranga定义间隔大小

python

1	np.arange(10,30,5)

array([10, 15, 20, 25])

python

1	np.arange(0,2,0.3)

array([0. , 0.3, 0.6, 0.9, 1.2, 1.5, 1.8])

linspace定义总共多少间隔

python

1	np.linspace(0,2,5)

array([0. , 0.5, 1. , 1.5, 2. ])

随机数

纯随机

python

1	np.random.rand(2,3)

array([[0.80722577, 0.64305952, 0.22218733],
       [0.32297689, 0.46138349, 0.18416854]])

标准正态分布随机

python

1	np.random.randn(2,3)

array([[ 2.65856467,  1.35871691, -0.13102211],
       [ 0.212424  , -0.87073905,  0.83563459]])

一般正态分布随机

python

1	np.random.normal(loc=1,scale=0.5,size=(2,3))

array([[0.52176134, 1.02691187, 1.37880997],
       [1.19657311, 1.31502553, 0.38417858]])

自定义离散总体及样本概率分布的随机

python

1 2	aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher'] np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3])

array(['pooh', 'pooh', 'pooh', 'Christopher', 'Christopher'], dtype='<U11')

整数随机

python

1	np.random.randint(low=-2,high=10,size=10)

array([0, 5, 8, 0, 9, 7, 3, 5, 2, 2])

python

a.min()

python

a.max()

python

a.sum()

python

1	a.cumsum()

array([ 0,  1,  3,  6, 10, 15], dtype=int32)

python

1	a.std(ddof=1)

1.8708286933869707

通用函数

python

1	a = np.arange(3)

计算

python

np.exp(a)

array([ 1.        ,  2.71828183,  7.3890561 ])

python

1	np.sqrt(a)

array([ 0.        ,  1.        ,  1.41421356])

python

1	c = np.array([2., -1., 4.])

python

1	np.add(a,c)

array([ 2.,  0.,  6.])

python

a+c

array([ 2.,  0.,  6.])

共轭

python

1	np.conjugate(1+2j)

(1-2j)

数组每两个数的差

python

1 2	x = np.array([1, 2, 4, 7, 0]) np.diff(x)

array([ 1,  2,  3, -7])

叉积

python

1 2	a=np.array([1,2,3]) b=np.array([2,3,4])

python

1	np.cross(a,b)

array([-1,  2, -1])

元素积

python

a*b

array([ 2,  6, 12])

点积

python

1	np.dot(a,b)

内积

python

1	np.inner(a,b)

外积

python

1	np.outer(a,b)

array([[ 2,  3,  4],
       [ 4,  6,  8],
       [ 6,  9, 12]])

检查元素

是否所有元素等于true

python

1 2	a = np.array([True, False, True]) np.all(a)

False

python

1	np.alltrue(a)

False

是否有至少一个元素等于true

python

np.any(a)

True

按照条件寻找坐标

np.where的理解可参考：
https://www.zhihu.com/question/62844162

1维的情况

python

1	a=np.array([1,2,3])

python

1	np.where(a>1)

(array([1, 2], dtype=int64),)

多维的情况

python

1 2	x = np.arange(9.).reshape(3, 3) x

array([[ 0.,  1.,  2.],
       [ 3.,  4.,  5.],
       [ 6.,  7.,  8.]])

python

1	np.where( x > 5.0)

(array([2, 2, 2], dtype=int64), array([0, 1, 2], dtype=int64))

俩个数组第一个是横坐标，第二个是纵坐标

自定义函数

单数组内的函数

python

1 2	def myfunc(b): return(b[0] + b[1])

python

1	a=np.array([[1,2,3],[4,5,6]])

python

1	np.apply_along_axis(myfunc,1,a)

array([3, 9])

多数组函数

python

def myfunc(a, b):
    "Return a-b if a>b, otherwise return a+b"
    if a > b:
        return a - b
    else:
        return a + b

vfunc = np.vectorize(myfunc)
vfunc([1, 2, 3, 4], 2)

array([3, 4, 1, 2])

最大最小值

python

1	a=np.array([3,1,2,5,4,6])

python

1	np.argmax(a)

python

1	np.argmin(a)

排序

python

1	a=np.array([3,1,2,5,4,6])

python

1	np.sort(a)

array([1, 2, 3, 4, 5, 6])

python

1	np.argsort(a)

array([1, 2, 0, 4, 3, 5], dtype=int64)

python

surnames =    ('Hertz',    'Galilei', 'Hertz')
first_names = ('Heinrich', 'Galileo', 'Gustav')
ind = np.lexsort((first_names, surnames))
ind

array([1, 2, 0], dtype=int64)

组合排序

python

a = [1,5,1,4,3,4,4] # First column
b = [9,4,0,4,0,2,1] # Second column
ind = np.lexsort((b,a)) # Sort by a, then by b
print(ind)

[2 0 4 6 5 3 1]

返回插入点

python

1	np.searchsorted([1,2,3,4,5], 3)

求和求积

python

1 2	a=np.arange(1,5) a

array([1, 2, 3, 4])

累加乘积

python

1	np.cumprod(a)

array([ 1,  2,  6, 24], dtype=int32)

累加求和

python

1	np.cumsum(a)

array([ 1,  3,  6, 10], dtype=int32)

所有元素乘积

python

1	np.prod(a)

对角线求和

python

1 2	a=np.eye(3) a

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

python

1	np.trace(a)

3.0

描述统计

python

1 2	a=np.array([3,1,2,5,4,6]) a

array([3, 1, 2, 5, 4, 6])

python

np.max(a)

python

np.min(a)

python

1	np.mean(a)

3.5

python

1	np.median(a)

3.5

python

1	np.std(a, ddof=1)

1.8708286933869707

python

1	np.var(a, ddof=1)

3.5

非零数据位置

python

1	np.nonzero(a)

(array([0, 1, 2, 3, 4, 5], dtype=int64),)

整数数字统计

python

1	np.bincount(a)

array([0, 1, 1, 1, 1, 1, 1], dtype=int64)

四舍五入

向上取整

python

1	a = np.array([0.1, -0.5, 1.8])

python

1	np.ceil(a)

array([ 1., -0.,  2.])

向下取整

python

1	np.floor(a)

array([ 0., -1.,  1.])

四舍五入

python

1	np.round(a)

array([ 0., -0.,  2.])

限制数字范围

python

1	np.clip(a, 0, 1.5)

array([ 0.1,  0. ,  1.5])

矩阵操作

转置

python

1 2	x = np.arange(4).reshape((2,2)) x

array([[0, 1],
       [2, 3]])

python

1	np.transpose(x)

array([[0, 2],
       [1, 3]])

python

def mypolyval(p, x):
...     print(p)
...     _p = list(p)
...     res = _p.pop(0)
...     while _p:
...         res = res*x + _p.pop(0)
...     print(res)
...     return res
>>> vpolyval = np.vectorize(mypolyval, excluded=['p'])

python

1 2	vpolyval(p=[1, 2, 3], x=[0, 1]) np.array([3, 6])

[1, 2, 3]
3
[1, 2, 3]
3
[1, 2, 3]
6





array([3, 6])

mask

python

1	a = np.arange(5)

python

1 2	np.putmask(a, a>1, a**2) a

array([ 0,  1,  4,  9, 16])

python

1
2
3

a = np.arange(5)
np.putmask(a, a>1, [88,99,33])
a

array([ 0,  1, 33, 88, 99])

索引，切片和迭代

python

1 2	a = np.arange(10)**3 a

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729], dtype=int32)

索引和切片

等间距索引

python

1 2	a[:6:2] = -1000 # equivalent to a[0:6:2] = -1000; a # from start to position 6, exclusive, set every 2nd element to -1000

array([-1000,     1, -1000,    27, -1000,   125,   216,   343,   512,   729], dtype=int32)

反向数组

python

a[ : :-1]

array([  729,   512,   343,   216,   125, -1000,    27, -1000,     1, -1000], dtype=int32)

多维数组索引

python

def f(x,y):
    return 10*x+y
b = np.fromfunction(f,(5,4),dtype=int)
b

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13],
       [20, 21, 22, 23],
       [30, 31, 32, 33],
       [40, 41, 42, 43]])

python

b[2,3]

python

1	b[0:5, 1] # each row in the second column of b

array([ 1, 11, 21, 31, 41])

python

1	b[ : ,1] # equivalent to the previous example

array([ 1, 11, 21, 31, 41])

python

1	b[1:3, : ] # each column in the second and third row of b

array([[10, 11, 12, 13],
       [20, 21, 22, 23]])

python

1	b[-1] # the last row. Equivalent to b[-1,:]

array([40, 41, 42, 43])

python

1	b[-1,...] # the last row. Equivalent to b[-1,:]

array([40, 41, 42, 43])

高维切片

python

1 2	a = np.array([[1, 2], [3, 4], [5, 6]]) a

array([[1, 2],
       [3, 4],
       [5, 6]])

python

1	np.compress([0, 1], a, axis=0)

array([[3, 4]])

迭代

python

def f(x,y):
    return 10*x+y
b = np.fromfunction(f,(5,4),dtype=int)
b

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13],
       [20, 21, 22, 23],
       [30, 31, 32, 33],
       [40, 41, 42, 43]])

第一个轴迭代

python

1 2	for row in b: print (row)

[0 1 2 3]
[10 11 12 13]
[20 21 22 23]
[30 31 32 33]
[40 41 42 43]

每一个元素迭代

python

1 2	for element in b.flat: print (element,end=",")

0,1,2,3,10,11,12,13,20,21,22,23,30,31,32,33,40,41,42,43,

enumerate 迭代

python

1 2	for index, x in np.ndenumerate(b): print(index, x)

(0, 0) 0
(0, 1) 1
(0, 2) 2
(0, 3) 3
(1, 0) 10
(1, 1) 11
(1, 2) 12
(1, 3) 13
(2, 0) 20
(2, 1) 21
(2, 2) 22
(2, 3) 23
(3, 0) 30
(3, 1) 31
(3, 2) 32
(3, 3) 33
(4, 0) 40
(4, 1) 41
(4, 2) 42
(4, 3) 43

增加轴

python

1	np.newaxis == None

True

python

1 2	x = np.arange(3) x

array([0, 1, 2])

python

1	x[:, np.newaxis]

array([[0],
       [1],
       [2]])

python

1	x[:, None]

array([[0],
       [1],
       [2]])

查看索引

很方便的函数，可以查看一个指定shape的数组的每一个元素的index

python

1	grid = np.indices((2, 3))

python

grid

array([[[0, 0, 0],
        [1, 1, 1]],

       [[0, 1, 2],
        [0, 1, 2]]])

形状操作

python

1 2	a = np.floor(10*np.random.random((3,4))) a

array([[ 5.,  3.,  6.,  7.],
       [ 5.,  5.,  2.,  8.],
       [ 1.,  4.,  7.,  6.]])

python

a.shape

(3, 4)

平坦化

由ravel()展平的数组元素的顺序通常是 C风格的，就是说，最右边的索引变化得最快，所以元素a[0,0]之后是a[0,1]。如果数组被改变形状(reshape)成其它形状，数组仍然是 C风格的。Numpy通常创建一个以这个顺序保存数据的数组，所以ravel()将总是不需要复制它的参数。但是如果数组是通过切片其它数组或有不同寻常的选项时，它可能需要被复制。函数reshape()和ravel()还可以被同过一些可选参数构建成 FORTRAN风格的数组，即最左边的索引变化最快

ravel可以按不同方式展开，可选

python

a.ravel()

array([ 5.,  3.,  6.,  7.,  5.,  5.,  2.,  8.,  1.,  4.,  7.,  6.])

flatten只能是按row展开

python

1	a.flatten()

array([ 5.,  3.,  6.,  7.,  5.,  5.,  2.,  8.,  1.,  4.,  7.,  6.])

a.flat 是一个iterator

python

1 2	for b in a.flat: print(b)

5.0
3.0
6.0
7.0
5.0
5.0
2.0
8.0
1.0
4.0
7.0
6.0

np.reshape也可以完成同样的任务

python

1	np.reshape(a, (1, 12)).squeeze()

array([ 5.,  3.,  6.,  7.,  5.,  5.,  2.,  8.,  1.,  4.,  7.,  6.])

转置

python

1	a.transpose()

array([[ 5.,  5.,  1.],
       [ 3.,  5.,  4.],
       [ 6.,  2.,  7.],
       [ 7.,  8.,  6.]])

更改形状

reshape和resize一样但是reshape不改变数组自身

python

1	a.reshape(2,6)

array([[ 5.,  3.,  6.,  7.,  5.,  5.],
       [ 2.,  8.,  1.,  4.,  7.,  6.]])

python

array([[ 5.,  3.,  6.,  7.],
       [ 5.,  5.,  2.,  8.],
       [ 1.,  4.,  7.,  6.]])

python

1	a.resize(2,6)

python

array([[ 5.,  3.,  6.,  7.,  5.,  5.],
       [ 2.,  8.,  1.,  4.,  7.,  6.]])

数组组合(stack)

python

1 2	a = np.floor(10*np.random.random((2,2))) a

array([[ 2.,  5.],
       [ 5.,  8.]])

python

1 2	b = np.floor(10*np.random.random((2,2))) b

array([[ 5.,  2.],
       [ 0.,  0.]])

组合

纵向组合 vertical stack

python

1	np.vstack((a,b))

array([[ 2.,  5.],
       [ 5.,  8.],
       [ 5.,  2.],
       [ 0.,  0.]])

横向组合 horizontal stack

python

1	np.hstack((a,b))

array([[ 2.,  5.,  5.,  2.],
       [ 5.,  8.,  0.,  0.]])

np.column_stack相当于vstack弱鸡版本，只能对一位数组用的vstack

python

1	np.column_stack((a,b))

array([[ 2.,  5.,  5.,  2.],
       [ 5.,  8.,  0.,  0.]])

分割

python

1 2	a = np.floor(10*np.random.random((2,12))) a

array([[ 4.,  0.,  7.,  0.,  8.,  3.,  5.,  7.,  9.,  9.,  9.,  7.],
       [ 8.,  1.,  0.,  0.,  7.,  4.,  5.,  8.,  5.,  6.,  9.,  7.]])

纵向分割

python

1	np.hsplit(a ,3)

[array([[ 4.,  0.,  7.,  0.],
        [ 8.,  1.,  0.,  0.]]), array([[ 8.,  3.,  5.,  7.],
        [ 7.,  4.,  5.,  8.]]), array([[ 9.,  9.,  9.,  7.],
        [ 5.,  6.,  9.,  7.]])]

python

1	np.hsplit(a, (3,4))

[array([[ 4.,  0.,  7.],
        [ 8.,  1.,  0.]]), array([[ 0.],
        [ 0.]]), array([[ 8.,  3.,  5.,  7.,  9.,  9.,  9.,  7.],
        [ 7.,  4.,  5.,  8.,  5.,  6.,  9.,  7.]])]

任意轴分割

python

1	np.array_split(a, 2, axis=0)

[array([[ 4.,  0.,  7.,  0.,  8.,  3.,  5.,  7.,  9.,  9.,  9.,  7.]]),
 array([[ 8.,  1.,  0.,  0.,  7.,  4.,  5.,  8.,  5.,  6.,  9.,  7.]])]

复制

等号是相同数组

python

1
2
3

a = np.arange(12)
b = a            # no new object is created
b is a

True

python

1 2	b.shape = 3,4 # changes the shape of a a.shape

(3, 4)

函数调用是相同数组

python

1 2	def f(x): print (id(x))

python

id(a)

1997431125376

python

f(a)

1997431125376

浅复制, 复制数据, 但是不复制其他属性

python

1	c = a.view()

python

c is a

False

python

1	c.base is a

True

python

1	c.flags.owndata

False

python

1 2	c.shape = 2,6 # a's shape doesn't change a.shape

(3, 4)

python

1 2	c[0,4] = 1234 # a's data changesa a

array([[   0,    1,    2,    3],
       [1234,    5,    6,    7],
       [   8,    9,   10,   11]])

python

1
2
3

s = a[ : , 1:3]     # spaces added for clarity; could also be written "s = a[:,1:3]"
s[:] = 10           # s[:] is a view of s. Note the difference between s=10 and s[:]=10
a

array([[   0,   10,   10,    3],
       [1234,   10,   10,    7],
       [   8,   10,   10,   11]])

深复制

python

1 2	d=a.copy() d is a

False

python

1	d.base is a

False

python

1 2	d[0,0] = 9999 a

array([[   0,   10,   10,    3],
       [1234,   10,   10,    7],
       [   8,   10,   10,   11]])

数据大小

定义

查看维度(dimension)

查看多少个轴

查看数据类型

每个元素的字节大小

总个数

类型

数组创建

list创建

内置函数创建

全0数组

全1数组

空数组

单位对角矩阵

等间隔数列

随机数

通用函数

计算

检查元素

自定义函数

最大最小值

排序

求和求积

描述统计

四舍五入

相关性

矩阵操作

mask

索引，切片和迭代

索引和切片

迭代

增加轴

查看索引

形状操作

平坦化

转置

更改形状

组合

分割

复制