numpy3

NumPy Introduction 3

broadcast

1
2
3
4
5
6
7
8
import numpy as np  
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b
#Out: array([5, 6, 7])

a + 5
#Out: array([5, 6, 7])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
M = np.ones((3, 3))  
M
'''
Out: array([[ 1., 1., 1.],
[ 1., 1., 1.],
[ 1., 1., 1.]])
'''

M + a
'''
Out: array([[ 1., 2., 3.], change a to[[0,1,2],
[ 1., 2., 3.], [0,1,2],
[ 1., 2., 3.]]) [0,1,2]]
'''
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
 a = np.arange(3) 
b = np.arange(3)[:, np.newaxis] #change to the other direction
print(a)
print(b)
''''
out:
[0 1 2] #--a
[[0] #--b
[1]
[2]]
'''

a + b
'''
out:
array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4]])
'''

U50DZ6.png

rule of broadcast

1
2
M = np.ones((2, 3)) 
a = np.arange(3)

we can know that:

M.shape=(2,3) ; a.shape=(3,)

to add this two,first add one more dimension to a: a.shape=(1,3)

then expand a: a.shape=(2,3)

e.g.

1
2
3
4
5
6
7
8
a = np.arange(3).reshape((3, 1)) 
b = np.arange(3)
a + b
'''
Out: array([[0, 1, 2],
[1, 2, 3],
[2, 3, 4]])
'''

sometimes it may not success:

1
2
3
4
5
6
7
8
M = np.ones((3, 2)) 
a = np.arange(3)
M + a

---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-13-9e16e9f98da6> in <module>()
----> 1 M + a

after broadcast

M.shape -> (3, 2)

a.shape -> (3, 3)

if you want to add number on the right of the shape,you can reshape the array:

1
2
3
4
5
6
7
M + a[:, np.newaxis] #a's shape now is(3,1)

'''
Out: array([[ 1., 1.],
[ 2., 2.],
[ 3., 3.]])
'''

this rules can be used with any common function:

1
2
3
4
5
6
np.logaddexp(M, a[:, np.newaxis]) 
'''
Out: array([[ 1.31326169, 1.31326169],
[ 1.69314718, 1.69314718],
[ 2.31326169, 2.31326169]])
'''

it’s more accurate than log(exp(a) + exp(b))

dealing with numbers

1
2
3
4
X = np.random.random((10, 3))
Xmean = X.mean(0)
Xmean
#out:array([ 0.53514715, 0.66567217, 0.44385899])

check: whether they’re close to 0

1
2
3
X_centered = X - Xmean
X_centered.mean(0)
#Out: array([ 2.22044605e-17, -7.77156117e-17, -1.66533454e-17])

plot the picture:

1
2
3
4
5
6
7
8
x=np.linspace(0,5,50)
y=np.linspace(0,5,50)[:,np.newaxis]
z=np.sin(x)**10+np.cos(10+y*x)*np.cos(x)

%matplotlib inline
import matplotlib.pyplot as plt
plt.imshow(z, origin='lower', extent=[0, 5, 0, 5], cmap='viridis')
plt.colorbar()

U5sY01.png

mask

e.g. record the raining days

1
2
3
4
5
6
7
8
9
10
11
import numpy as np
import pandas as pd
rainfall=pd.read_csv('Seattle2014.csv')['PRCP'].values
inches=rainfall/254
inches.shape
#out:(365,)

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn; seaborn.set() # change the style
plt.hist(inches, 40);

U5cFSO.png

compare the value

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
x = np.array([1, 2, 3, 4, 5]) 
x < 3 # 小于
#Out: array([ True, True, False, False, False], dtype=bool)

x > 3 # 大于
#Out: array([False, False, False, True, True], dtype=bool)

x <= 3 # 小于等于
#Out: array([ True, True, True, False, False], dtype=bool)

x >= 3 # 大于等于
#Out: array([False, False, True, True, True], dtype=bool)

x != 3 # 不等于
#Out: array([ True, True, False, True, True], dtype=bool)

x == 3 # 等于
#Out: array([False, False, True, False, False], dtype=bool)

(2 * x) == (x ** 2)
#Out: array([False, True, False, False, False], dtype=bool)

Boolean

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
print(x)
'''
out:
[[5 0 3 3]
[7 9 3 5]
[2 4 7 6]]
'''

#the number of value less than 6
np.count_nonzero(x < 6)
#Out: 8

#the sum of value that less than 6
np.sum(x < 6)
#Out: 8

#count the number of value less than 6 in every line
np.sum(x < 6, axis=1)
#Out: array([4, 2, 2])

#is there any value larger than 8
np.any(x > 8)
#Out: True

#is there any value smaller than 0
np.any(x < 0)
#Out: False

#are all the value smaller than 10
np.all(x < 10)
#Out: True

#are all the number 6
np.all(x == 6)
#Out: False

#np.all() and np.any() can also be used like this:
#are all the value in the first line 8
np.all(x < 8, axis=1)
#Out: array([ True, False, True], dtype=bool)

Boolean symbols can also be used:

1
2
3
4
5
6
np.sum((inches > 0.5) & (inches < 1)) #count the date of raining within the standard
#Out: 29

#can be also written like:
np.sum(~( (inches <= 0.5) | (inches >= 1) ))
#Out: 29
& np.bitwise_and
| np.bitwise_or
^ np.bitwise_xor
~ np.bitwise_not
1
2
3
4
5
6
7
8
9
10
11
print("Number days without rain: ", np.sum(inches == 0)) 
print("Number days with rain: ", np.sum(inches != 0))
print("Days with more than 0.5 inches:", np.sum(inches > 0.5))
print("Rainy days with < 0.1 inches :", np.sum((inches > 0) & (inches < 0.2)))
'''
out:
Number days without rain: 215
Number days with rain: 150
Days with more than 0.5 inches: 37
Rainy days with < 0.1 inches : 75
'''

use Boolean as mask

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
x 
'''
Out: array([[5, 0, 3, 3],
[7, 9, 3, 5],
[2, 4, 7, 6]])
'''

x < 5
'''
Out: array([[False, True, True, True],
[False, False, True, False],
[ True, True, False, False]], dtype=bool)
'''

x[x < 5]
#Out: array([0, 3, 3, 3, 2, 4])

easier way to get index

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import numpy as np 
rand = np.random.RandomState(42)
x = rand.randint(100, size=10)
print(x)
#out:[51 92 14 71 60 20 82 86 74 74]


[x[3], x[7], x[2]]
#Out: [71, 86, 14]


ind = [3, 7, 4]
x[ind]
#Out: array([71, 86, 60])


ind = np.array([[3, 7],
[4, 5]])
x[ind]
#Out: array([[71, 86],
# [60, 20]])


X = np.arange(12).reshape((3, 4))
X
#Out: array([[ 0, 1, 2, 3],
# [ 4, 5, 6, 7],
# [ 8, 9, 10, 11]])

row = np.array([0, 1, 2]) #the index
col = np.array([2, 1, 3])
X[row, col]
#Out: array([ 2, 5, 11])

X[row[:, np.newaxis], col] #form multi-dimension array
#Out: array([[ 2, 1, 3],
# [ 6, 5, 7],
# [10, 9, 11]])

#we can see the calculation through:
row[:, np.newaxis] * col
#Out: array([[0, 0, 0],
# [2, 1, 3],
# [4, 2, 6]])


X[2, [2, 0, 1]]
#Out: array([10, 8, 9])

X[1:, [2, 0, 1]]
#Out: array([[ 6, 4, 5],
# [10, 8, 9]])

#mask:
mask = np.array([1, 0, 1, 0], dtype=bool)
X[row[:, np.newaxis], mask]
#Out: array([[ 0, 2],
# [ 4, 6],
# [ 8, 10]])

change the value:

1
2
3
4
5
6
7
8
9
x = np.arange(10) 
i = np.array([2, 1, 8, 4])
x[i] = 99
print(x)
#out:[ 0 99 99 3 99 5 6 7 99 9]

x[i] -= 10 #the selected ones -10
print(x)
#out:[ 0 89 89 3 89 5 6 7 89 9]

another way:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
x = np.zeros(10) 
x[[0, 0]] = [4, 6]
print(x)
#out:[ 6. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

i = [2, 3, 3, 4, 4, 4]
x[i] += 1
x
#Out: array([ 6., 0., 1., 1., 1., 0., 0., 0., 0., 0.])

x = np.zeros(10)
np.add.at(x, i, 1)
print(x)
#out:[ 0. 0. 1. 2. 3. 0. 0. 0. 0. 0.]

sort the array

know more about sort: https://arya-1017.github.io/2020/07/12/《算法图解》读书笔记1/

and https://arya-1017.github.io/2020/07/13/《算法图解》读书笔记2/

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import numpy as np 
def selection_sort(x):
for i in range(len(x)):
swap = i + np.argmin(x[i:])
(x[i], x[swap]) = (x[swap], x[i])
return x

x = np.array([2, 1, 4, 3, 5])
selection_sort(x)
#Out: array([1, 2, 3, 4, 5])

def bogosort(x):
while np.any(x[:-1] > x[1:]):
np.random.shuffle(x)
return x
x = np.array([2, 1, 4, 3, 5])
bogosort(x)
#Out: array([1, 2, 3, 4, 5])

easier way:

1
2
3
4
5
6
7
8
9
10
11
12
x = np.array([2, 1, 4, 3, 5]) 
np.sort(x)
#Out: array([1, 2, 3, 4, 5])

#argsort返回的是原始数组排好序的索引值
x = np.array([2, 1, 4, 3, 5])
i = np.argsort(x)
print(i)
#out:[1 0 3 2 4]

x[i]
#Out: array([1, 2, 3, 4, 5])

multi-dimension array:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
rand = np.random.RandomState(42) 
X = rand.randint(0, 10, (4, 6))
print(X)
'''
out:
[[6 3 7 4 6 9]
[2 6 7 4 3 7]
[7 2 5 4 1 7]
[5 1 4 0 9 5]]
'''

# 对X的每一列排序
np.sort(X, axis=0)
'''
Out:
array([[2, 1, 4, 0, 1, 5],
[5, 2, 5, 4, 3, 7],
[6, 3, 7, 4, 6, 7],
[7, 6, 7, 4, 9, 9]])
'''

# 对X每一行排序
np.sort(X, axis=1)
'''
Out:
array([[3, 4, 6, 6, 7, 9],
[2, 3, 4, 6, 7, 7],
[1, 2, 4, 5, 7, 7],
[0, 1, 4, 5, 5, 9]])
'''

sort partially:

1
2
3
4
5
6
7
8
9
x = np.array([7, 2, 3, 1, 6, 5, 4]) 
np.partition(x, 3) #get 3 minimum numbers in the array
#Out: array([2, 1, 3, 4, 6, 5, 7])

np.partition(X, 2, axis=1)
#Out: array([[3, 4, 6, 7, 6, 9],
# [2, 3, 4, 7, 6, 7],
# [1, 2, 4, 5, 7, 7],
# [0, 1, 4, 5, 9, 5]])