NumPy Introduction 3

broadcast

import numpy as np  
a = np.array([0, 1, 2])  
b = np.array([5, 5, 5])  
a + b  
#Out: array([5, 6, 7])

a + 5 
#Out: array([5, 6, 7])

M = np.ones((3, 3))  
M  
'''
Out: array([[ 1., 1., 1.],  
			[ 1., 1., 1.],  
		    [ 1., 1., 1.]])  
'''

M + a  
'''
Out: array([[ 1., 2., 3.],      change a to[[0,1,2],
			[ 1., 2., 3.],                  [0,1,2],
			[ 1., 2., 3.]])                 [0,1,2]]
'''

 a = np.arange(3) 
 b = np.arange(3)[:, np.newaxis] #change to the other direction
 print(a) 
 print(b) 
''''
out:
[0 1 2]   #--a
[[0]      #--b
 [1] 
 [2]] 
'''

a + b 
'''
out:
array([[0, 1, 2], 
 	   [1, 2, 3], 
 	   [2, 3, 4]])
'''

rule of broadcast

1 2	M = np.ones((2, 3)) a = np.arange(3)

we can know that:

M.shape=(2,3) ; a.shape=(3,)

to add this two,first add one more dimension to a: a.shape=(1,3)

then expand a: a.shape=(2,3)

e.g.

a = np.arange(3).reshape((3, 1)) 
b = np.arange(3)
a + b 
'''
Out: array([[0, 1, 2], 
 			[1, 2, 3], 
 			[2, 3, 4]])
'''

sometimes it may not success:

M = np.ones((3, 2)) 
a = np.arange(3)
M + a 
    
--------------------------------------------------------------------------- 
ValueError Traceback (most recent call last) 
<ipython-input-13-9e16e9f98da6> in <module>() 
----> 1 M + a

after broadcast

M.shape -> (3, 2)

a.shape -> (3, 3)

if you want to add number on the right of the shape,you can reshape the array:

M + a[:, np.newaxis] #a's shape now is(3,1)

'''
Out: array([[ 1., 1.], 
			[ 2., 2.], 
			[ 3., 3.]])
'''

this rules can be used with any common function:

np.logaddexp(M, a[:, np.newaxis]) 
'''
Out: array([[ 1.31326169, 1.31326169], 
 			[ 1.69314718, 1.69314718], 
 			[ 2.31326169, 2.31326169]])
'''

it’s more accurate than log(exp(a) + exp(b))

dealing with numbers

X = np.random.random((10, 3))
Xmean = X.mean(0) 
Xmean
#out:array([ 0.53514715, 0.66567217, 0.44385899])

check: whether they’re close to 0

1
2
3

X_centered = X - Xmean
X_centered.mean(0) 
#Out: array([ 2.22044605e-17, -7.77156117e-17, -1.66533454e-17])

plot the picture:

x=np.linspace(0,5,50)
y=np.linspace(0,5,50)[:,np.newaxis]
z=np.sin(x)**10+np.cos(10+y*x)*np.cos(x)

%matplotlib inline 
import matplotlib.pyplot as plt 
plt.imshow(z, origin='lower', extent=[0, 5, 0, 5], cmap='viridis') 
plt.colorbar()

mask

e.g. record the raining days

import numpy as np
import pandas as pd
rainfall=pd.read_csv('Seattle2014.csv')['PRCP'].values
inches=rainfall/254 
inches.shape
#out:(365,)

%matplotlib inline 
import matplotlib.pyplot as plt 
import seaborn; seaborn.set() # change the style
plt.hist(inches, 40);

compare the value

x = np.array([1, 2, 3, 4, 5]) 
x < 3 # 小于
#Out: array([ True, True, False, False, False], dtype=bool) 

x > 3 # 大于
#Out: array([False, False, False, True, True], dtype=bool) 

x <= 3 # 小于等于
#Out: array([ True, True, True, False, False], dtype=bool) 

x >= 3 # 大于等于
#Out: array([False, False, True, True, True], dtype=bool) 

x != 3 # 不等于
#Out: array([ True, True, False, True, True], dtype=bool) 

x == 3 # 等于
#Out: array([False, False, True, False, False], dtype=bool)

(2 * x) == (x ** 2) 
#Out: array([False, True, False, False, False], dtype=bool)

Boolean

print(x)
'''
out:
[[5 0 3 3] 
 [7 9 3 5] 
 [2 4 7 6]]
'''

#the number of value less than 6
np.count_nonzero(x < 6) 
#Out: 8

#the sum of value that less than 6
np.sum(x < 6) 
#Out: 8

#count the number of value less than 6 in every line
np.sum(x < 6, axis=1) 
#Out: array([4, 2, 2])

#is there any value larger than 8
np.any(x > 8) 
#Out: True 

#is there any value smaller than 0
 np.any(x < 0) 
#Out: False 

#are all the value smaller than 10
 np.all(x < 10) 
#Out: True 

#are all the number 6
 np.all(x == 6) 
#Out: False

#np.all() and np.any() can also be used like this:
#are all the value in the first line 8
np.all(x < 8, axis=1) 
#Out: array([ True, False, True], dtype=bool)

Boolean symbols can also be used:

np.sum((inches > 0.5) & (inches < 1)) #count the date of raining within the standard
#Out: 29

#can be also written like:
np.sum(~( (inches <= 0.5) | (inches >= 1) )) 
#Out: 29

&	np.bitwise_and
\|	np.bitwise_or
^	np.bitwise_xor
~	np.bitwise_not

print("Number days without rain: ", np.sum(inches == 0)) 
print("Number days with rain: ", np.sum(inches != 0)) 
print("Days with more than 0.5 inches:", np.sum(inches > 0.5)) 
print("Rainy days with < 0.1 inches :", np.sum((inches > 0) & (inches < 0.2))) 
'''
out:
Number days without rain: 215 
Number days with rain: 150 
Days with more than 0.5 inches: 37 
Rainy days with < 0.1 inches : 75
'''

use Boolean as mask

x 
'''
Out: array([[5, 0, 3, 3], 
 			[7, 9, 3, 5], 
 			[2, 4, 7, 6]])
'''

x < 5 
'''
Out: array([[False, True, True, True], 
 			[False, False, True, False], 
 			[ True, True, False, False]], dtype=bool)
'''

x[x < 5] 
#Out: array([0, 3, 3, 3, 2, 4])

easier way to get index

import numpy as np 
rand = np.random.RandomState(42) 
x = rand.randint(100, size=10) 
print(x) 
#out:[51 92 14 71 60 20 82 86 74 74]


[x[3], x[7], x[2]] 
#Out: [71, 86, 14]


ind = [3, 7, 4] 
x[ind] 
#Out: array([71, 86, 60])


ind = np.array([[3, 7], 
 				[4, 5]]) 
x[ind] 
#Out: array([[71, 86], 
# 			 [60, 20]])


X = np.arange(12).reshape((3, 4)) 
X 
#Out: array([[ 0, 1, 2, 3], 
# 			 [ 4, 5, 6, 7], 
#			 [ 8, 9, 10, 11]])

row = np.array([0, 1, 2]) #the  index
col = np.array([2, 1, 3]) 
X[row, col] 
#Out: array([ 2, 5, 11])

X[row[:, np.newaxis], col] #form multi-dimension array
#Out: array([[ 2, 1, 3], 
#			 [ 6, 5, 7], 
#			 [10, 9, 11]])

#we can see the calculation through:
row[:, np.newaxis] * col 
#Out: array([[0, 0, 0], 
#			 [2, 1, 3], 
#			 [4, 2, 6]])


X[2, [2, 0, 1]] 
#Out: array([10, 8, 9])

X[1:, [2, 0, 1]] 
#Out: array([[ 6, 4, 5], 
#			 [10, 8, 9]])

#mask:
mask = np.array([1, 0, 1, 0], dtype=bool) 
X[row[:, np.newaxis], mask] 
#Out: array([[ 0, 2], 
#			 [ 4, 6], 
#			 [ 8, 10]])

change the value:

x = np.arange(10) 
i = np.array([2, 1, 8, 4]) 
x[i] = 99 
print(x) 
#out:[ 0 99 99 3 99 5 6 7 99 9]

x[i] -= 10 #the selected ones -10
print(x) 
#out:[ 0 89 89 3 89 5 6 7 89 9]

another way:

x = np.zeros(10) 
x[[0, 0]] = [4, 6] 
print(x) 
#out:[ 6. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

i = [2, 3, 3, 4, 4, 4] 
x[i] += 1 
x 
#Out: array([ 6., 0., 1., 1., 1., 0., 0., 0., 0., 0.])

x = np.zeros(10) 
np.add.at(x, i, 1) 
print(x) 
#out:[ 0. 0. 1. 2. 3. 0. 0. 0. 0. 0.]

sort the array

know more about sort: https://arya-1017.github.io/2020/07/12/《算法图解》读书笔记1/

and https://arya-1017.github.io/2020/07/13/《算法图解》读书笔记2/

import numpy as np 
def selection_sort(x): 
	for i in range(len(x)): 
		swap = i + np.argmin(x[i:]) 
        (x[i], x[swap]) = (x[swap], x[i]) 
    return x 

x = np.array([2, 1, 4, 3, 5]) 
selection_sort(x) 
#Out: array([1, 2, 3, 4, 5])

def bogosort(x): 
	while np.any(x[:-1] > x[1:]): 
		np.random.shuffle(x) 
	return x 
x = np.array([2, 1, 4, 3, 5]) 
bogosort(x) 
#Out: array([1, 2, 3, 4, 5])

easier way:

x = np.array([2, 1, 4, 3, 5]) 
np.sort(x) 
#Out: array([1, 2, 3, 4, 5])

#argsort返回的是原始数组排好序的索引值
x = np.array([2, 1, 4, 3, 5]) 
i = np.argsort(x) 
print(i) 
#out:[1 0 3 2 4]

x[i] 
#Out: array([1, 2, 3, 4, 5])

multi-dimension array:

rand = np.random.RandomState(42) 
X = rand.randint(0, 10, (4, 6)) 
print(X) 
'''
out:
[[6 3 7 4 6 9] 
 [2 6 7 4 3 7] 
 [7 2 5 4 1 7] 
 [5 1 4 0 9 5]] 
'''

# 对X的每一列排序
np.sort(X, axis=0) 
'''
Out: 
array([[2, 1, 4, 0, 1, 5], 
 	   [5, 2, 5, 4, 3, 7], 
	   [6, 3, 7, 4, 6, 7], 
	   [7, 6, 7, 4, 9, 9]]) 
'''

# 对X每一行排序
np.sort(X, axis=1) 
'''
Out: 
array([[3, 4, 6, 6, 7, 9], 
 	   [2, 3, 4, 6, 7, 7], 
	   [1, 2, 4, 5, 7, 7], 
	   [0, 1, 4, 5, 5, 9]])
'''

sort partially:

x = np.array([7, 2, 3, 1, 6, 5, 4]) 
np.partition(x, 3) #get 3 minimum numbers in the array
#Out: array([2, 1, 3, 4, 6, 5, 7])

np.partition(X, 2, axis=1) 
#Out: array([[3, 4, 6, 7, 6, 9], 
#			 [2, 3, 4, 7, 6, 7], 
#			 [1, 2, 4, 5, 7, 7], 
#			 [0, 1, 4, 5, 9, 5]])