As I dive deeper and deeper into Tensorflow, computer vision is an area that got my attention -- fast. I decided then to create a little project to cement my learning in this area, and chose a fruit and vegetable classifier because it is something that got my attention at my local supermarket on the self-assisten checkouts: whenever you put some produce on the scale the machine narrows down the options for you to select on the screen to just a few (sometimes only one) based on what the machine perceives to be in front of the camera, making the process easier for the customer. Sheer computer vision in action.
I decided to do something similar while testing the limits of some models I created on the fly. I got two datasets for my exercise:
Huge kudos to them for putting these datasets out there.
The data does not come ready in the format I wanted it to be to use old trustworthy image_dataset_from_directory
so I had to prepare it. For this I used these scripts. I also decided to remove some data from the GroceryStore dataset (specifically thing that came in packages), so that both datasets included fruits and veggies only.
I managed to optimise a simple model to excel at classifying fruits and vegetables on curated, standard photos of fruits and vegetables (from the Fruit 360 dataset), but, as expected, the model failer miserably when real-world, complex images with messy backgrounds were used (GroceryStore dataset). I tried deepening the model and re-training, but it did not work. Only when I brought in an EfficientNet model things improved, achieving a final accuracy of ~80% with very, very noisy data, and good performance over some random images I downloaded from the internet to test the model. This project was a great learning exercise, so I am proudly sharing the results with you here.
As with previous entries, if you have any questions, suggestions, or comments, do drop me a line via the portfolio's main page!
Thanks Daniel Bourke for the course and some of the helpful functions I borrowed.
# Libraries
import tensorflow as tf
import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
import random
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras import Sequential
from tensorflow.keras import layers
from sklearn.utils.class_weight import compute_class_weight
import warnings
warnings.filterwarnings('ignore')
# Aesthetics for plots
sns.set_theme(
context='talk',
font_scale=0.9,
palette = ['#0F00F5', '#3061FF', '#9AB1FF', '#CDD9FF', '#E6ECFF','#E5E5E5',
'#B6BBCB', '#878B98','#696A6F','#292A2E'],
style = {
'axes.facecolor': '#FFFFFF',
'axes.edgecolor': '#000000',
'axes.legend.edgecolor':'#FFFFFF',
'axes.grid': False,
'axes.axisbelow': 'line',
'axes.labelcolor': 'black',
'figure.facecolor': '#FFFFFF',
'grid.color': '#b0b0b0',
'grid.linestyle': '-',
'text.color': 'black',
'xtick.color': 'black',
'ytick.color': 'black',
'xtick.direction': 'out',
'ytick.direction': 'out',
'patch.edgecolor': '#FFFFFF',
'patch.force_edgecolor': True,
'image.cmap': 'viridis',
'font.family': ['sans-serif'],
'font.sans-serif': 'Helvetica Neue',
'xtick.bottom': False,
'xtick.top': False,
'ytick.left': False,
'ytick.right': False,
'axes.spines.left': False,
'axes.spines.bottom': False,
'axes.spines.right': False,
'axes.spines.top': False
}
)
# Create the train and test dir paths
train_dir = 'data/fruits-360-original-size/train_labeled'
test_dir = 'data/fruits-360-original-size/test_labeled'
# Flow data
IMG_SIZE = (224,224)
train_data = image_dataset_from_directory(
directory=train_dir,
image_size=IMG_SIZE,
batch_size=64,
label_mode='categorical'
)
test_data = image_dataset_from_directory(
directory=test_dir,
image_size=IMG_SIZE,
batch_size=64,
label_mode='categorical'
)
Found 29222 files belonging to 20 classes.
Found 14527 files belonging to 20 classes.
# Visualise random images from training dataset
target_classes = random.choices(train_data.class_names, k=9)
random_images_path = []
for data_class in target_classes:
target_dir = train_dir + '/' + data_class
random_image = random.choice(os.listdir(target_dir))
random_images_path.append(train_dir + '/' + data_class + '/' + random_image)
fig, axes = plt.subplots(3,3,figsize=(9,9))
for path, ax in zip(random_images_path, axes.ravel()):
img_plot = mpimg.imread(path)
img_class = path.split('/')[-2]
ax.imshow(img_plot)
ax.set_title(f'Random Image\nclass: {img_class}');
ax.axis(False);
# Create class weight dict for training, to deal with class imbalances
all_labels = []
for _, labels in train_data.unbatch():
class_index = tf.argmax(labels).numpy()
all_labels.append(class_index)
all_labels = np.array(all_labels)
class_names = train_data.class_names
num_classes = len(class_names)
class_weights_array = compute_class_weight(
class_weight='balanced',
classes=np.arange(num_classes),
y=all_labels
)
class_weights = dict(enumerate(class_weights_array))
# Create a base model
tf.random.set_seed(0)
data_augmentation = Sequential([
layers.RandomFlip('horizontal'),
layers.RandomContrast(0.1),
layers.RandomZoom(.2)
],
name = 'data_augmentation'
)
inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')
x = data_augmentation(inputs)
x = tf.keras.layers.Rescaling(1./255)(x)
x = layers.Conv2D(32, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.3)(x)
x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(
len(train_data.class_names),
activation='softmax',
name='output_layer',
dtype='float32'
)(x)
model_0 = tf.keras.Model(inputs, outputs, name='Model_0')
model_0.summary()
Model: "Model_0"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast (Cast) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ data_augmentation (Sequential) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 222, 222, 32) │ 896 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization │ (None, 222, 222, 32) │ 128 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 220, 220, 32) │ 9,248 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 108, 108, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_1 │ (None, 108, 108, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 106, 106, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_average_pooling2d │ (None, 64) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_1 (Cast) │ (None, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ output_layer (Dense) │ (None, 34) │ 2,210 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 68,162 (266.26 KB)
Trainable params: 67,970 (265.51 KB)
Non-trainable params: 192 (768.00 B)
# Prefetch data for faster training
tf.random.set_seed(0)
AUTOTUNE = tf.data.AUTOTUNE
train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)
# Initial setup and training
checkpoint_path = f'model_checkpoint_weights/{model_0.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = False,
save_freq = 'epoch', #save every epoch
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=3,
restore_best_weights=True,
verbose=1
)
model_0.compile(loss=CategoricalCrossentropy(),
optimizer=Adam(),
metrics=['accuracy'])
history_0 = model_0.fit(
train_data,
epochs=30,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[checkpoint_callback, early_stop_callback]
)
#Save the model
model_0.save('models/model_0.keras')
#Save history
history_0_df = pd.DataFrame(history_0.history)
# Plot curves
def plot_train_val_curves(history, model_name=None):
history_df = history.copy()
history_df.columns = ['Training_Accuracy', 'Training_Loss', 'Validation_Accuracy', 'Validation_Loss']
history_df['Epoch'] = history_df.index + 1
history_melt = pd.melt(history_df, id_vars='Epoch', value_vars=history_df.loc[:-2])
history_melt['Event'] = [i.split('_')[0] for i in history_melt['variable']]
history_melt['Type'] = [i.split('_')[1] for i in history_melt['variable']]
fig, axes = plt.subplots(1,2,figsize=(15,5))
sns.lineplot(data=history_melt[history_melt['Type']=='Accuracy'], x='Epoch', y='value', hue='Event', ax=axes[0], palette='bright').set_title('Accuracy')
sns.lineplot(data=history_melt[history_melt['Type']=='Loss'], x='Epoch', y='value', hue='Event', ax=axes[1], palette='bright').set_title('Loss')
epoch_stopped = (np.argmax(history_df['Validation_Accuracy']) + 1)
axes[0].legend().get_frame().set_linewidth(0)
axes[1].legend().get_frame().set_linewidth(0)
axes[0].set_ylim(0,1)
axes[1].set_ylim(0,)
max_loss = history_melt[history_melt['Type']=='Loss'].max().tolist()[0]
axes[0].axvline(x=epoch_stopped, ymin=axes[0].get_ylim()[0], ymax=axes[0].get_ylim()[1], ls='--', color='r', lw=1)
axes[1].axvline(x=epoch_stopped, ymin=axes[1].get_ylim()[0], ymax=max_loss, ls='--', color='r', lw=1)
axes[0].text(x=epoch_stopped+.2, y=axes[0].get_ylim()[0], s='Model Restored\nby Early Stop', color='r', fontsize=10)
axes[1].text(x=epoch_stopped+.2, y=axes[1].get_ylim()[1]*1.05, s='Model Restored\nby Early Stop', color='r', fontsize=10)
axes[0].grid(axis='y', linewidth=0.3)
axes[1].grid(axis='y', linewidth=0.3)
axes[0].set_ylabel('')
axes[1].set_ylabel('')
ylabels = axes[0].get_yticks()
axes[0].set_yticks(ticks=ylabels, labels=[f'{i*100:.0f}%' for i in ylabels])
plt.suptitle(f'{model_name} Performance');
plot_train_val_curves(history_0_df, 'Model 0')
# Evaluate model on full val dataset
model_0_eval = model_0.evaluate(test_data, return_dict=True)
for metric, value in model_0_eval.items():
print(f'{str(metric).title()}: {value:.3f}')
227/227 ━━━━━━━━━━━━━━━━━━━━ 41s 180ms/step - accuracy: 0.8835 - loss: 0.3026 Accuracy: 0.883 Loss: 0.306
# Make preds
model_0_pred = model_0.predict(test_data)
model_0_pred = np.argmax(model_0_pred, axis=1)
# Unbatch and collect true labels
y_true = []
for _, label in test_data.unbatch():
y_true.append(label.numpy())
# Convert to a np array
y_true = np.array(y_true)
# Collect labels
y_true_argmax = [np.argmax(y_true[i]) for i in range(len(y_true))]
227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 190ms/step
# Make confusion matrix to assess overall performance
import itertools
from sklearn.metrics import confusion_matrix
def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=10, norm=False, savefig=False, title='Confusion Matrix'):
# Create the confustion matrix
cm = confusion_matrix(y_true, y_pred)
cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
n_classes = cm.shape[0] # find the number of classes we're dealing with
# Plot the figure and make it pretty
fig, ax = plt.subplots(figsize=figsize)
cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
#fig.colorbar(cax)
# Are there a list of classes?
if classes:
labels = classes
else:
labels = np.arange(cm.shape[0])
# Label the axes
ax.set(title=title,
xlabel="Predicted label",
ylabel="True label",
xticks=np.arange(n_classes), # create enough axis slots for each class
yticks=np.arange(n_classes),
xticklabels=labels, # axes will labeled with class names (if they exist) or ints
yticklabels=labels)
# Make x-axis labels appear on bottom
ax.xaxis.set_label_position("bottom")
ax.xaxis.tick_bottom()
ax.set_xticklabels(labels,rotation=90)
# Set the threshold for different colors
threshold = (cm.max() + cm.min()) / 2.
# Plot the text on each cell
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
if norm:
plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
horizontalalignment="center",
color="white" if cm[i, j] > threshold else "black",
size=text_size)
else:
plt.text(j, i, f"{cm[i, j]}",
horizontalalignment="center",
color="white" if cm[i, j] > threshold else "black",
size=text_size)
# Save the figure to the current working directory
if savefig:
fig.savefig("confusion_matrix.png")
make_confusion_matrix(y_true_argmax, model_0_pred, classes=class_names, title='Model 0 Confusion Matrix')
# See how the model is performing with images and predictions
def load_and_prep_image(filename, img_shape=224, scale=True):
img = tf.io.read_file(filename) # read the img
img = tf.io.decode_image(img, channels=3) #decode img into tensor
img = tf.image.resize(img, [img_shape, img_shape]) #resize the image
#Scale? Yes/No
if scale:
return img/255.
else:
return img
def plot_model_images(model):
plt.figure(figsize=(17,10))
for i in range(9):
#Choose random imgs
class_name = random.choice(class_names)
filename = random.choice(os.listdir(test_dir + '/' + class_name))
filepath = test_dir + '/' + class_name + '/' + filename
#load the imgs and make preds
img = load_and_prep_image(filepath, scale=False) #load img
img_expanded = tf.expand_dims(img, axis=0) #Expand dims to match the model's expected shape
pred_prob = model.predict(img_expanded) #get probs array
pred_class = class_names[pred_prob.argmax()] #get highest pred prob and attach to class
plt.subplot(3,3,i+1)
plt.imshow(img/255.)
if class_name == pred_class:
title_color = 'g'
else:
title_color = 'r'
plt.title(f'Actual: {class_name}, pred: {pred_class}\nprob: {pred_prob.max():.2f}', c=title_color)
plt.axis(False)
plot_model_images(model_0)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
# Train the model some more with LR scheduler to see if I can improve it
model_1 = tf.keras.models.clone_model(model_0)
model_1.set_weights(model_0.get_weights())
model_1.name = 'Model_1'
model_1.summary()
Model: "Model_1"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_2 (Cast) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ data_augmentation (Sequential) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 222, 222, 32) │ 896 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization │ (None, 222, 222, 32) │ 128 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 220, 220, 32) │ 9,248 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 108, 108, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_1 │ (None, 108, 108, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 106, 106, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_average_pooling2d │ (None, 64) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_3 (Cast) │ (None, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ output_layer (Dense) │ (None, 34) │ 2,210 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 68,162 (266.26 KB)
Trainable params: 67,970 (265.51 KB)
Non-trainable params: 192 (768.00 B)
checkpoint_path = f'model_checkpoint_weights/{model_1.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = False,
save_freq = 'epoch', #save every epoch
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=5,
restore_best_weights=True,
verbose=1
)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_accuracy',
factor=0.5,
patience=3,
min_lr=1e-6,
verbose=1
)
model_1.compile(loss=CategoricalCrossentropy(),
optimizer=Adam(),
metrics=['accuracy'])
history_1 = model_1.fit(
train_data,
epochs=30,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[checkpoint_callback, early_stop_callback, reduce_lr]
)
#Save the model
model_1.save('models/model_1.keras')
#Save history
history_1_df = pd.DataFrame(history_1.history)
#Evalute new model accuracy
model_1_eval = model_1.evaluate(test_data, return_dict=True)
for metric, value in model_1_eval.items():
print(f'{str(metric).title()}: {value:.3f}')
227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 192ms/step - accuracy: 0.9560 - loss: 0.1373 Accuracy: 0.956 Loss: 0.137
# Plot curves
plot_train_val_curves(history_1_df.iloc[:, :-1], 'Model 1')
# Make preds and plot confusion matrix
model_1_pred = model_1.predict(test_data)
model_1_pred = np.argmax(model_1_pred, axis=1)
make_confusion_matrix(y_true_argmax, model_1_pred, classes=class_names, title='Model 1 Confusion Matrix')
227/227 ━━━━━━━━━━━━━━━━━━━━ 43s 187ms/step
# Let's see how it is performing with images and predictions
plot_model_images(model_1)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
# Test performance with internet pics
def pred_and_plot(model=None, final_test_dir=None, class_names=None):
plt.figure(figsize=(10,10))
# Choose 9 random images
filepaths = [final_test_dir + '/' + os.listdir(final_test_dir)[i] for i in range(len(os.listdir(final_test_dir)))]
imgs = random.choices(filepaths, k=9)
for i in range(9):
img_path = imgs[i]
img = load_and_prep_image(img_path, scale=False)
img_expanded = tf.expand_dims(img, axis=0)
pred_prob = model.predict(img_expanded)
pred_class = class_names[pred_prob.argmax()]
plt.subplot(3,3,i+1)
plt.imshow(img/255.)
plt.title(f'pred: {pred_class}\nprob: {pred_prob.max():.2f}')
plt.axis(False)
plt.subplots_adjust(hspace=.3, wspace=.01)
pred_and_plot(model_1, final_test_dir='data/final_test_img', class_names=class_names)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
# Bad performance with complex images, as expected. Let's move onto using more complex images for training
# Now to train the same earlier model but with the new data and see how it goes
# Create the train and test dir paths
train_dir = 'data/GroceryStoreDataset/dataset/train_labeled'
test_dir = 'data/GroceryStoreDataset/dataset/test_labeled'
IMG_SIZE = (224,224)
# Flow data
train_data = image_dataset_from_directory(
directory=train_dir,
image_size=IMG_SIZE,
batch_size=64,
label_mode='categorical'
)
test_data = image_dataset_from_directory(
directory=test_dir,
image_size=IMG_SIZE,
batch_size=64,
label_mode='categorical'
)
Found 1776 files belonging to 34 classes. Found 1704 files belonging to 34 classes.
# Visualise random images from training dataset
target_classes = random.choices(train_data.class_names, k=9)
random_images_path = []
for data_class in target_classes:
target_dir = train_dir + '/' + data_class
random_image = random.choice(os.listdir(target_dir))
random_images_path.append(train_dir + '/' + data_class + '/' + random_image)
fig, axes = plt.subplots(3,3,figsize=(9,9))
for path, ax in zip(random_images_path, axes.ravel()):
img_plot = mpimg.imread(path)
img_class = path.split('/')[-2]
ax.imshow(img_plot)
ax.set_title(f'Random Image\nclass: {img_class}');
ax.axis(False);
plt.subplots_adjust(hspace=.3)
# Create class weight dict for training to handle class imbalance
all_labels = []
for _, labels in train_data.unbatch():
class_index = tf.argmax(labels).numpy()
all_labels.append(class_index)
all_labels = np.array(all_labels)
class_names = train_data.class_names
num_classes = len(class_names)
class_weights_array = compute_class_weight(
class_weight='balanced',
classes=np.arange(num_classes),
y=all_labels
)
class_weights = dict(enumerate(class_weights_array))
# Transfer model to new model as feature extractor and attach new head since the data and number classes is different
feature_extractor = tf.keras.Model(
inputs=model_1.input,
outputs=model_1.layers[-2].output
)
feature_extractor.trainable = False
outputs = layers.Dense(
len(class_names), activation='softmax', name='output_layer', dtype='float32'
)(feature_extractor.output)
model_2 = tf.keras.Model(feature_extractor.input, outputs, name='Model_2')
model_2.summary()
Model: "Model_2"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_2 (Cast) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ data_augmentation (Sequential) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 222, 222, 32) │ 896 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization │ (None, 222, 222, 32) │ 128 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 220, 220, 32) │ 9,248 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 110, 110, 32) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 108, 108, 64) │ 18,496 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_1 │ (None, 108, 108, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 106, 106, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 53, 53, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_average_pooling2d │ (None, 64) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_4 (Cast) │ (None, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ output_layer (Dense) │ (None, 34) │ 2,210 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 68,162 (266.26 KB)
Trainable params: 2,210 (8.63 KB)
Non-trainable params: 65,952 (257.62 KB)
# Repeat similar tranining for model 0
tf.random.set_seed(0)
AUTOTUNE = tf.data.AUTOTUNE
train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)
checkpoint_path = f'model_checkpoint_weights/{model_2.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = True,
monitor='val_accuracy',
save_freq = 'epoch', #save every epoch
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=10,
restore_best_weights=True,
verbose=1
)
model_2.compile(loss=CategoricalCrossentropy(),
optimizer=Adam(),
metrics=['accuracy'])
history_2 = model_2.fit(
train_data,
epochs=100,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[checkpoint_callback,early_stop_callback]
)
#Save the model
model_2.save('models/model_2.keras')
#Save history
history_2_df = pd.DataFrame(history_2.history)
#Plot curves
plot_train_val_curves(history_2_df, 'Model 2')
#Fine tune
feature_extractor.trainable=True
for layer in feature_extractor.layers[:-8]:
layer.trainable = False
checkpoint_path = f'model_checkpoint_weights/{model_2.name}_finetuned_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
monitor='val_accuracy',
save_weights_only = True,
save_best_only = True,
save_freq = 'epoch', #save every epoch
verbose = 1
)
model_2.compile(loss=CategoricalCrossentropy(),
optimizer=Adam(learning_rate=0.0001),
metrics=['accuracy'])
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = False,
save_freq = 'epoch', #save every epoch
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=10,
restore_best_weights=True,
verbose=1
)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_accuracy',
factor=0.5,
patience=5,
min_lr=1e-7,
verbose=1
)
history_2_finetuned = model_2.fit(
train_data,
epochs=200,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[checkpoint_callback, early_stop_callback, reduce_lr]
)
#Save the model
model_2.save('models/model_2_finetuned.keras')
#Save history
history_2_finetuned_df = pd.DataFrame(history_2_finetuned.history)
#Plot curves
plot_train_val_curves(history_2_finetuned_df.iloc[:,:4], 'Model 2 Fine-tuned')
# Pretty bad performance, probably because the model is not deep enough and the images are messy. Let's try a deeper model
inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')
data_augmentation = Sequential([
layers.RandomFlip('horizontal'),
layers.RandomContrast(0.1),
layers.RandomZoom(.2)
],
name = 'data_augmentation'
)
x = data_augmentation(inputs)
x = tf.keras.layers.Rescaling(1./255)(x)
x = layers.Conv2D(64, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.3)(x)
x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.4)(x)
x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.5)(x)
x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.5)(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(len(class_names), activation='softmax', dtype='float32')(x)
model_3 = tf.keras.Model(inputs, outputs, name="Model_3")
model_3.summary()
Model: "Model_3"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast (Cast) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ data_augmentation (Sequential) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ rescaling (Rescaling) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d (Conv2D) │ (None, 222, 222, 64) │ 1,792 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization │ (None, 222, 222, 64) │ 256 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_1 (Conv2D) │ (None, 222, 222, 64) │ 36,928 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d (MaxPooling2D) │ (None, 111, 111, 64) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_2 (Conv2D) │ (None, 111, 111, 128) │ 73,856 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_1 │ (None, 111, 111, 128) │ 512 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_3 (Conv2D) │ (None, 111, 111, 128) │ 147,584 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_1 (MaxPooling2D) │ (None, 55, 55, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ (None, 55, 55, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_4 (Conv2D) │ (None, 55, 55, 256) │ 295,168 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_2 │ (None, 55, 55, 256) │ 1,024 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_5 (Conv2D) │ (None, 55, 55, 256) │ 590,080 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_2 (MaxPooling2D) │ (None, 27, 27, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ (None, 27, 27, 256) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_6 (Conv2D) │ (None, 27, 27, 512) │ 1,180,160 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_3 │ (None, 27, 27, 512) │ 2,048 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_7 (Conv2D) │ (None, 27, 27, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_3 (MaxPooling2D) │ (None, 13, 13, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_2 (Dropout) │ (None, 13, 13, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ conv2d_8 (Conv2D) │ (None, 13, 13, 512) │ 2,359,808 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ batch_normalization_4 │ (None, 13, 13, 512) │ 2,048 │ │ (BatchNormalization) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ max_pooling2d_4 (MaxPooling2D) │ (None, 6, 6, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_3 (Dropout) │ (None, 6, 6, 512) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_average_pooling2d │ (None, 512) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 128) │ 65,664 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_4 (Dropout) │ (None, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense_1 (Dense) │ (None, 34) │ 4,386 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 7,121,122 (27.16 MB)
Trainable params: 7,118,178 (27.15 MB)
Non-trainable params: 2,944 (11.50 KB)
checkpoint_path = f'model_checkpoint_weights/{model_3.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = True,
monitor='val_accuracy',
save_freq = 'epoch', #save every epoch
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=5,
restore_best_weights=True,
verbose=1
)
model_3.compile(loss=CategoricalCrossentropy(),
optimizer=Adam(),
metrics=['accuracy'])
history_3 = model_3.fit(
train_data,
epochs=100,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[checkpoint_callback,early_stop_callback]
)
#Save the model
model_3.save('models/model_3.keras')
#Save history
history_3_df = pd.DataFrame(history_3.history)
#Plot curves
plot_train_val_curves(history_3_df.iloc[:,:4], 'Model 3')
model_3_eval = model_3.evaluate(test_data, return_dict=True)
for metric, value in model_3_eval.items():
print(f'{str(metric).title()}: {value:.3f}')
27/27 ━━━━━━━━━━━━━━━━━━━━ 20s 731ms/step - accuracy: 0.0879 - loss: 6.4976 Accuracy: 0.085 Loss: 6.520
# Accuracy and performance are very low. This is not worh fine-tuning. Let's try with a complex EfficientNet model.
# Transfer learning from EffNet
base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2S(include_top=False, input_shape=IMG_SIZE + (3,), weights='imagenet')
base_model.trainable = False
data_augmentation = tf.keras.Sequential([
layers.RandomFlip('horizontal'),
layers.RandomRotation(0.1),
layers.RandomZoom(0.1),
layers.RandomTranslation(0.1, 0.1),
layers.RandomContrast(0.1),
], name='data_augmentation')
# Set up model acrchitecture with trainable top layers
inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')
x = data_augmentation(inputs)
x = base_model(x, training=False)
x = tf.keras.applications.efficientnet.preprocess_input(x)
x = layers.GlobalAveragePooling2D(name='global_avg_pool')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001))(x)
outputs = layers.Dense(
len(class_names),
activation='softmax',
name='output_layer',
dtype='float32'
)(x)
model_4 = tf.keras.Model(inputs,outputs, name='Model_4')
model_4.summary()
Model: "Model_4"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ input_layer (InputLayer) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_9 (Cast) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ data_augmentation (Sequential) │ (None, 224, 224, 3) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ efficientnetv2-s (Functional) │ (None, 7, 7, 1280) │ 20,331,360 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ global_avg_pool │ (None, 1280) │ 0 │ │ (GlobalAveragePooling2D) │ │ │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_2 (Dropout) │ (None, 1280) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ (None, 128) │ 163,968 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ cast_10 (Cast) │ (None, 128) │ 0 │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ output_layer (Dense) │ (None, 34) │ 4,386 │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 20,499,714 (78.20 MB)
Trainable params: 168,354 (657.63 KB)
Non-trainable params: 20,331,360 (77.56 MB)
checkpoint_path = f'model_checkpoint_weights/{model_4.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = True,
monitor='val_accuracy',
save_freq = 'epoch',
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=6,
restore_best_weights=True,
verbose=1
)
lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
initial_learning_rate=0.001,
decay_steps=10000
)
model_4.compile(loss=CategoricalCrossentropy(label_smoothing=0.1),
optimizer=Adam(learning_rate=lr_schedule),
metrics=['accuracy'])
history_4 = model_4.fit(
train_data,
epochs=30,
steps_per_epoch=int(len(train_data)*.25),
validation_data=test_data,
validation_steps=int(len(test_data)*.25),
class_weight=class_weights,
callbacks=[
checkpoint_callback
, early_stop_callback
]
)
#Save the model
model_4.save('models/model_4.keras')
#Save history
history_4_df = pd.DataFrame(history_4.history)
#Plot curves
plot_train_val_curves(history_4_df.iloc[:,:4], 'Model 4')
model_4_eval = model_4.evaluate(test_data, return_dict=True)
for metric, value in model_4_eval.items():
print(f'{str(metric).title()}: {value:.3f}')
27/27 ━━━━━━━━━━━━━━━━━━━━ 100s 3s/step - accuracy: 0.6637 - loss: 1.7993 Accuracy: 0.668 Loss: 1.762
#That's much better. Now this can be fine-tuned.
# Fine tune
base_model.trainable=True
# Unfreeze Top N layers # Do full fine-tune if acc doesn't improve
n_layers = 150
for layer in base_model.layers[:-n_layers]:
layer.trainable = False
checkpoint_path = f'model_checkpoint_weights/{model_4.name}_finetuned_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath = checkpoint_path,
save_weights_only = True,
save_best_only = True,
monitor='val_accuracy',
mode='max',
verbose = 1
)
early_stop_callback = tf.keras.callbacks.EarlyStopping(
monitor='val_accuracy',
patience=10,
restore_best_weights=True,
mode='max',
verbose=1
)
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=4,
min_lr=1e-7,
verbose=1
)
model_4.compile(loss=CategoricalCrossentropy(label_smoothing=0.1),
optimizer=Adam(learning_rate=1e-4),
metrics=['accuracy'])
history_4_finetuned = model_4.fit(
train_data,
epochs=100,
steps_per_epoch=int(len(train_data)),
validation_data=test_data,
validation_steps=int(len(test_data)),
class_weight=class_weights,
callbacks=[checkpoint_callback
, early_stop_callback
, reduce_lr
]
)
#Save/load the model
model_4.save('models/model_4_finetuned.keras')
model_4_finetuned = tf.keras.models.load_model('models/model_4_finetuned.keras')
#Save history
history_4_finetuned_df = pd.read_csv('model_history_records/history_4_finetuned.csv')
#Plot curves
plot_train_val_curves(history_4_finetuned_df.iloc[:,:4], 'Model 4 FT')
# Unbatch and collect true labels
y_true = []
for _, label in test_data.unbatch():
y_true.append(label.numpy())
# Convert to a np array
y_true = np.array(y_true)
# Collect labels
y_true_argmax = [np.argmax(y_true[i]) for i in range(len(y_true))]
# Make preds and plot confusion matrix
model_4_pred = model_4_finetuned.predict(test_data)
model_4_pred = np.argmax(model_4_pred, axis=1)
27/27 ━━━━━━━━━━━━━━━━━━━━ 49s 2s/step
make_confusion_matrix(y_true_argmax, model_4_pred, classes=class_names, title='Model 4 FT Confusion Matrix')
plot_model_images(model_4)
1/1 ━━━━━━━━━━━━━━━━━━━━ 9s 9s/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 552ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 232ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 216ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 833ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 224ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 160ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 161ms/step
# Much better performance on messy images
pred_and_plot(model_4, final_test_dir='data/final_test_img', class_names=class_names)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 360ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 159ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 159ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 157ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 153ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 151ms/step
# Also much better performance on the noisy images from the internet!
Thanks for reading!