Neural-Network-based Iris Flower Classification

0 Dataset Description

Description Source: https://archive.ics.uci.edu/ml/datasets/Iris/

Data Set Information:
This is perhaps the best known database to be found in the pattern recognition literature. Fisher’s paper is a classic in the field and is referenced frequently to this day. (See Duda & Hart, for example.) The data set contains 3 classes of 50 instances each, where each class refers to a type of iris plant. One class is linearly separable from the other 2; the latter are NOT linearly separable from each other.
Predicted attribute: class of iris plant.
This is an exceedingly simple domain.
Attribute Information:
column 1: sepal length in cm
column 2: sepal width in cm
column 3: petal length in cm
column 4: petal width in cm
column 5: class:
– Iris Setosa
– Iris Versicolour
– Iris Virginica

1 Observe the data

Load dataset from csv file.

%matplotlib inline
import matplotlib.pyplot as plt
import csv
import numpy as np
import random
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Declare list for storing dataset
sepal_length = np.array([])
sepal_width = np.array([])
petal_length = np.array([])
petal_width = np.array([])
iris_class = np.array([])

# Load dataset from csv file
reader = csv.reader(open('./irisDataset.csv','r'))
for row in reader:#to create the dataframe
    sepal_length = np.append(sepal_length, float(row[0])) #separate different values to different rows
    sepal_width = np.append(sepal_width, float(row[1]))
    petal_length = np.append(petal_length, float(row[2]))
    petal_width = np.append(petal_width, float(row[3]))
    iris_class = np.append(iris_class, row[4])

Observe the format of data. (Take 10 examples randomly)

h =set()
while(len(h)<10):
    h.add(random.randint(0, 149))#add the random number
    #choose 10 random example
print("ID \t SEPAL_LENGTH \t SEPAL_WIDTH \t PETAL_LENGTH \t PETAL_WIDTH \t|| CLASS")
for i in h:
    print("%d \t %.1f \t\t %.1f \t\t %.1f \t\t %.1f \t\t|| %s" #print the result
          % (i, sepal_length[i], sepal_width[i], petal_length[i], petal_width[i], iris_class[i]))#print the 10 example

ID 	 SEPAL_LENGTH 	 SEPAL_WIDTH 	 PETAL_LENGTH 	 PETAL_WIDTH 	|| CLASS
33 	    5.5 		    4.2 		    1.4 		    0.2 		|| setosa
132     6.4 		    2.8 		    5.6 		    2.2 		|| virginica
135     7.7 		    3.0 		    6.1 		    2.3 		|| virginica
77 	    6.7 		    3.0 		    5.0 		    1.7 		|| versicolor
14 	    5.8 		    4.0 		    1.2 		    0.2 		|| setosa
15 	    5.7 		    4.4 		    1.5 		    0.4 		|| setosa
146     6.3 		    2.5 		    5.0 		    1.9 		|| virginica
54 	    6.5 		    2.8 		    4.6 		    1.5 		|| versicolor
58 	    6.6 		    2.9 		    4.6 		    1.3 		|| versicolor
31 	    5.4 		    3.4 		    1.5 		    0.4 		|| setosa

Plot dataset.

plt.figure(figsize=(12,5))

#plot 3 kinds of iris, letting width y-axis, length x-axis, and plot the label(using sepal values)
plt.subplot(1,2,1) #plot two background
plt.scatter(sepal_length[iris_class=='setosa'], sepal_width[iris_class=='setosa'])
plt.scatter(sepal_length[iris_class=='versicolor'],sepal_width[iris_class=='versicolor'])
plt.scatter(sepal_length[iris_class=='virginica'], sepal_width[iris_class=='virginica'])
plt.legend(("setosa","versicolor","virginica")) 
plt.xlabel('sepal_length')
plt.ylabel('sepal_width')
#plot 3 kinds of iris, letting width y-axis, length x-axis, and plot the label(using petal values)
plt.subplot(1,2,2)
plt.scatter(petal_length[iris_class=='setosa'], petal_width[iris_class=='setosa'])
plt.scatter(petal_length[iris_class=='versicolor'], petal_width[iris_class=='versicolor'])
plt.scatter(petal_length[iris_class=='virginica'], petal_width[iris_class=='virginica'])
plt.legend(("setosa","versicolor","virginica"))
plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.show()
#sepal values can't separate them as gragh1 shows
#petal values can separate the as gragh2 shows,so we choose the petal values

2 Preprocess dataset and Divide dataset

Through observing the figure above, we can find that it’s easier to classify iris flowers by petal_width and petal_length. So, we can simplify the problem by ignoring sepal_width and sepal_length. (This kind of operation may decrease accuracy)

#put the petal values into array
dataset_x = np.array((petal_length, petal_width)).T

#change differet kinds of iris into 0,1,2, which is easy to be recognized for computer
dataset_y = np.zeros(iris_class.shape, dtype=int)
dataset_y[iris_class == 'setosa'] = 0
dataset_y[iris_class == 'versicolor'] = 1
dataset_y[iris_class == 'virginica'] = 2

num = len(iris_class)
splitLine = int(0.8*num)

# generate a random list to select data randomly
index = [i for i in range(0, num)]
random.shuffle(index) #put them in random order

# divide into trainset and testset
train_x = dataset_x[index[0:splitLine]]
train_y = dataset_y[index[0:splitLine]] 
test_x = dataset_x[index[splitLine:num]]
test_y = dataset_y[index[splitLine:num]]

Encode the classes into one-hot encoding.
For example,
0→[1,0,0]
1→[0,1,0]
2→[0,0,1]

1
2
3

# One-Hot Encoding
train_y_onehot = keras.utils.to_categorical(train_y, 3)
test_y_onehot = keras.utils.to_categorical(test_y, 3)

Let’s observe the trainset and testset in different marker.

#put the trainset and testset in one graph to compare
plt.figure(figsize=(8,6))
#the "+" means trainset
#different colours mean different kinds of iris
plt.scatter(train_x[train_y==0, 0], train_x[train_y==0, 1], marker='+', color='red')
plt.scatter(train_x[train_y==1, 0], train_x[train_y==1, 1], marker='+', color='green')
plt.scatter(train_x[train_y==2, 0], train_x[train_y==2, 1], marker='+', color='blue')
# "v" means testset
plt.scatter(test_x[test_y==0, 0], test_x[test_y==0, 1], marker='v', color='red')
plt.scatter(test_x[test_y==1, 0], test_x[test_y==1, 1], marker='v', color='green')
plt.scatter(test_x[test_y==2, 0], test_x[test_y==2, 1], marker='v', color='blue')
plt.legend(("train_setosa","train_versicolor","train_virginica","test_setosa","test_versicolor","test_virginica")) #plot the label
plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.show()

3 Describe your neural network

# describe the model
model = keras.Sequential([
    layers.Dense(units=64, activation='relu', input_shape=(2,)),
    layers.Dense(units=64, activation='relu'),
    layers.Dense(units=3, activation='softmax')
])

model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
dense_3 (Dense)              (None, 64)                192       
_________________________________________________________________
dense_4 (Dense)              (None, 64)                4160      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 195       
=================================================================
Total params: 4,547
Trainable params: 4,547
Non-trainable params: 0
_________________________________________________________________

We can use Mean Square Error (MSE) as loss function, and use Stochastic Gradient Descent (SGD) as the process we train the model (i.e. “optimizer“).
(PS: MSE is not so suitable for classification)

1	model.compile(loss='mse', optimizer='sgd')

4 Training

1
2

model.fit(train_x, train_y_onehot, batch_size=4, epochs=100, verbose=1)
#fit( x, y, batch_size=32, epochs=10, verbose=1, callbacks=None,validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0)

5 Evaluation of the trained model

We can use .evaluate to compute loss on testset.

1 2	cost = model.evaluate(test_x, test_y_onehot) print('test cost:', cost) #find the cost(difference to the real value)

1 2	scores = model.predict(test_x)#predict the next five value print(scores)

results = np.array([])
for i in scores:
    results = np.append(results, np.argmax(i))
print(results)
#out:[0. 2. 0. 2. 1. 2. 1. 1. 2. 2. 2. 2. 0. 0. 1. 2. 2. 0. 2. 0. 2. 2. 1. 1. 0. 2. 2. 1. 1. 1.]

Visualize our model of prediction.

plt.figure(figsize=(15,5))

# draw ground truth
plt.subplot(1,3,1)
plt.scatter(test_x[test_y==0, 0], test_x[test_y==0, 1], marker='v', color='red')
plt.scatter(test_x[test_y==1, 0], test_x[test_y==1, 1], marker='v', color='green')
plt.scatter(test_x[test_y==2, 0], test_x[test_y==2, 1], marker='v', color='blue')
plt.legend(("setosa","versicolor","virginica"))
plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.title('Ground Truth of Testset')

# draw prediction
plt.subplot(1,3,2)
plt.scatter(test_x[results==0, 0], test_x[results==0, 1], color='red')
plt.scatter(test_x[results==1, 0], test_x[results==1, 1], color='green')
plt.scatter(test_x[results==2, 0], test_x[results==2, 1], color='blue')
plt.legend(("setosa","versicolor","virginica"))
plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.title('Prediction of Testset')

# draw classiication areas
x = np.arange(min(test_x[:,0]), max(test_x[:,0]), 0.05)
y = np.arange(min(test_x[:,1]), max(test_x[:,1]), 0.05)
X,Y = np.meshgrid(x,y)
grid = np.array((X.flatten(), Y.flatten())).T
scores_1 = model.predict(grid)
decisions = np.array([])
for i in scores_1:
    decisions = np.append(decisions, np.argmax(i))
plt.subplot(1,3,3)
plt.scatter(grid[decisions==0, 0], grid[decisions==0, 1], color='red')
plt.scatter(grid[decisions==1, 0], grid[decisions==1, 1], color='green')
plt.scatter(grid[decisions==2, 0], grid[decisions==2, 1], color='blue')
plt.legend(("setosa","versicolor","virginica"))
plt.xlabel('petal_length')
plt.ylabel('petal_width')
plt.title('Classification Areas')

plt.show()