# Libraries
import tensorflow as tf
import pandas as pd
import os
import numpy as np 

import matplotlib.pyplot as plt 
import matplotlib.image as mpimg 
import seaborn as sns
import random 

from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Activation
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras import Sequential
from tensorflow.keras import layers

from sklearn.utils.class_weight import compute_class_weight

import warnings
warnings.filterwarnings('ignore')


# Aesthetics for plots
sns.set_theme(
    context='talk',
    font_scale=0.9,
    palette = ['#0F00F5', '#3061FF', '#9AB1FF', '#CDD9FF', '#E6ECFF','#E5E5E5',
               '#B6BBCB', '#878B98','#696A6F','#292A2E'],
    style = {
        'axes.facecolor': '#FFFFFF',
         'axes.edgecolor': '#000000',
         'axes.legend.edgecolor':'#FFFFFF',
         'axes.grid': False,
         'axes.axisbelow': 'line',
         'axes.labelcolor': 'black',
         'figure.facecolor': '#FFFFFF',
         'grid.color': '#b0b0b0',
         'grid.linestyle': '-',
         'text.color': 'black',
         'xtick.color': 'black',
         'ytick.color': 'black',
         'xtick.direction': 'out',
         'ytick.direction': 'out',
         'patch.edgecolor': '#FFFFFF',
         'patch.force_edgecolor': True,
         'image.cmap': 'viridis',
         'font.family': ['sans-serif'],
         'font.sans-serif': 'Helvetica Neue',
         'xtick.bottom': False,
         'xtick.top': False,
         'ytick.left': False,
         'ytick.right': False,
         'axes.spines.left': False,
         'axes.spines.bottom': False,
         'axes.spines.right': False,
         'axes.spines.top': False
    }
)


# Create the train and test dir paths

train_dir = 'data/fruits-360-original-size/train_labeled'
test_dir = 'data/fruits-360-original-size/test_labeled'


# Flow data

IMG_SIZE = (224,224)

train_data = image_dataset_from_directory(
    directory=train_dir,
    image_size=IMG_SIZE,
    batch_size=64,
    label_mode='categorical'
)

test_data = image_dataset_from_directory(
    directory=test_dir,
    image_size=IMG_SIZE,
    batch_size=64,
    label_mode='categorical'
)

Found 29222 files belonging to 20 classes.

Found 14527 files belonging to 20 classes.


# Visualise random images from training dataset

target_classes = random.choices(train_data.class_names, k=9)

random_images_path = []

for data_class in target_classes:
    target_dir = train_dir + '/' + data_class
    random_image = random.choice(os.listdir(target_dir))
    random_images_path.append(train_dir + '/' + data_class + '/' + random_image)


fig, axes = plt.subplots(3,3,figsize=(9,9))

for path, ax in zip(random_images_path, axes.ravel()):
    img_plot = mpimg.imread(path)
    img_class = path.split('/')[-2]
    ax.imshow(img_plot)
    ax.set_title(f'Random Image\nclass: {img_class}');
    ax.axis(False);


# Create class weight dict for training, to deal with class imbalances

all_labels = []

for _, labels in train_data.unbatch():
    class_index = tf.argmax(labels).numpy()
    all_labels.append(class_index)

all_labels = np.array(all_labels)


class_names = train_data.class_names
num_classes = len(class_names)

class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(num_classes),
    y=all_labels
)

class_weights = dict(enumerate(class_weights_array))

Model: "Model_0"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast (Cast)                     │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_1 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,162 (266.26 KB)

 Trainable params: 67,970 (265.51 KB)

 Non-trainable params: 192 (768.00 B)

227/227 ━━━━━━━━━━━━━━━━━━━━ 41s 180ms/step - accuracy: 0.8835 - loss: 0.3026
Accuracy: 0.883
Loss: 0.306

227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 190ms/step

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step

Model: "Model_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_2 (Cast)                   │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_3 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘


# Create a base model

tf.random.set_seed(0)

data_augmentation = Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomContrast(0.1),
    layers.RandomZoom(.2)
    ],
    name = 'data_augmentation'
)

inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')
x = data_augmentation(inputs)
x = tf.keras.layers.Rescaling(1./255)(x)

x = layers.Conv2D(32, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.3)(x)

x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPool2D()(x)
x = layers.Dropout(0.3)(x)

x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(
    len(train_data.class_names), 
    activation='softmax',
    name='output_layer',
    dtype='float32'
    )(x)
model_0 = tf.keras.Model(inputs, outputs, name='Model_0')
model_0.summary()

Model: "Model_0"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast (Cast)                     │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_1 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,162 (266.26 KB)

 Trainable params: 67,970 (265.51 KB)

 Non-trainable params: 192 (768.00 B)


# Prefetch data for faster training
tf.random.set_seed(0)

AUTOTUNE = tf.data.AUTOTUNE

train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)


# Initial setup and training
checkpoint_path = f'model_checkpoint_weights/{model_0.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = False,
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=3,
    restore_best_weights=True,
    verbose=1
)

model_0.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])

history_0 = model_0.fit(
    train_data,
    epochs=30,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[checkpoint_callback, early_stop_callback]
)


#Save the model
model_0.save('models/model_0.keras')


#Save history 
history_0_df = pd.DataFrame(history_0.history)


# Plot curves

def plot_train_val_curves(history, model_name=None):
    history_df = history.copy()
    history_df.columns = ['Training_Accuracy', 'Training_Loss', 'Validation_Accuracy', 'Validation_Loss']
    history_df['Epoch'] = history_df.index + 1
    history_melt = pd.melt(history_df, id_vars='Epoch', value_vars=history_df.loc[:-2])
    history_melt['Event'] = [i.split('_')[0] for i in history_melt['variable']]
    history_melt['Type'] = [i.split('_')[1] for i in history_melt['variable']]

    fig, axes = plt.subplots(1,2,figsize=(15,5))
    sns.lineplot(data=history_melt[history_melt['Type']=='Accuracy'], x='Epoch', y='value', hue='Event', ax=axes[0], palette='bright').set_title('Accuracy')
    sns.lineplot(data=history_melt[history_melt['Type']=='Loss'], x='Epoch', y='value', hue='Event', ax=axes[1], palette='bright').set_title('Loss')

    epoch_stopped = (np.argmax(history_df['Validation_Accuracy']) + 1)
    
    
    axes[0].legend().get_frame().set_linewidth(0)
    axes[1].legend().get_frame().set_linewidth(0)

    axes[0].set_ylim(0,1)
    axes[1].set_ylim(0,)

    max_loss = history_melt[history_melt['Type']=='Loss'].max().tolist()[0]
    axes[0].axvline(x=epoch_stopped, ymin=axes[0].get_ylim()[0], ymax=axes[0].get_ylim()[1], ls='--', color='r', lw=1)
    axes[1].axvline(x=epoch_stopped, ymin=axes[1].get_ylim()[0], ymax=max_loss, ls='--', color='r', lw=1)
    
    axes[0].text(x=epoch_stopped+.2, y=axes[0].get_ylim()[0], s='Model Restored\nby Early Stop', color='r', fontsize=10)
    axes[1].text(x=epoch_stopped+.2, y=axes[1].get_ylim()[1]*1.05, s='Model Restored\nby Early Stop', color='r', fontsize=10)
    
    axes[0].grid(axis='y', linewidth=0.3)
    axes[1].grid(axis='y', linewidth=0.3)
    axes[0].set_ylabel('')
    axes[1].set_ylabel('')
    
    ylabels = axes[0].get_yticks()
    axes[0].set_yticks(ticks=ylabels, labels=[f'{i*100:.0f}%' for i in ylabels])
    

    plt.suptitle(f'{model_name} Performance');


plot_train_val_curves(history_0_df, 'Model 0')


# Evaluate model on full val dataset
model_0_eval = model_0.evaluate(test_data, return_dict=True)
for metric, value in model_0_eval.items():
    print(f'{str(metric).title()}: {value:.3f}')

227/227 ━━━━━━━━━━━━━━━━━━━━ 41s 180ms/step - accuracy: 0.8835 - loss: 0.3026
Accuracy: 0.883
Loss: 0.306


# Make preds
model_0_pred = model_0.predict(test_data)
model_0_pred = np.argmax(model_0_pred, axis=1)

# Unbatch and collect true labels
y_true = []
for _, label in test_data.unbatch():
    y_true.append(label.numpy())

# Convert to a np array
y_true = np.array(y_true)

# Collect labels
y_true_argmax = [np.argmax(y_true[i]) for i in range(len(y_true))]

227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 190ms/step

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step

Model: "Model_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_2 (Cast)                   │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_3 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,162 (266.26 KB)

 Trainable params: 67,970 (265.51 KB)

 Non-trainable params: 192 (768.00 B)

227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 192ms/step - accuracy: 0.9560 - loss: 0.1373
Accuracy: 0.956
Loss: 0.137

227/227 ━━━━━━━━━━━━━━━━━━━━ 43s 187ms/step

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step


# Make confusion matrix to assess overall performance
import itertools
from sklearn.metrics import confusion_matrix

def make_confusion_matrix(y_true, y_pred, classes=None, figsize=(10, 10), text_size=10, norm=False, savefig=False, title='Confusion Matrix'): 
    # Create the confustion matrix
    cm = confusion_matrix(y_true, y_pred)
    cm_norm = cm.astype("float") / cm.sum(axis=1)[:, np.newaxis] # normalize it
    n_classes = cm.shape[0] # find the number of classes we're dealing with

    # Plot the figure and make it pretty
    fig, ax = plt.subplots(figsize=figsize)
    cax = ax.matshow(cm, cmap=plt.cm.Blues) # colors will represent how 'correct' a class is, darker == better
    #fig.colorbar(cax)

    # Are there a list of classes?
    if classes:
      labels = classes
    else:
      labels = np.arange(cm.shape[0])
    
    
    # Label the axes
    ax.set(title=title,
          xlabel="Predicted label",
          ylabel="True label",
          xticks=np.arange(n_classes), # create enough axis slots for each class
          yticks=np.arange(n_classes), 
          xticklabels=labels, # axes will labeled with class names (if they exist) or ints
          yticklabels=labels)
    
    # Make x-axis labels appear on bottom
    ax.xaxis.set_label_position("bottom")
    ax.xaxis.tick_bottom()
    ax.set_xticklabels(labels,rotation=90)

    # Set the threshold for different colors
    threshold = (cm.max() + cm.min()) / 2.

    # Plot the text on each cell
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
      if norm:
          plt.text(j, i, f"{cm[i, j]} ({cm_norm[i, j]*100:.1f}%)",
                  horizontalalignment="center",
                  color="white" if cm[i, j] > threshold else "black",
                  size=text_size)
      else:
        plt.text(j, i, f"{cm[i, j]}",
                horizontalalignment="center",
                color="white" if cm[i, j] > threshold else "black",
                size=text_size)

    # Save the figure to the current working directory
    if savefig:
      fig.savefig("confusion_matrix.png")


make_confusion_matrix(y_true_argmax, model_0_pred, classes=class_names, title='Model 0 Confusion Matrix')


# See how the model is performing with images and predictions

def load_and_prep_image(filename, img_shape=224, scale=True):
    img = tf.io.read_file(filename) # read the img

    img = tf.io.decode_image(img, channels=3) #decode img into tensor

    img = tf.image.resize(img, [img_shape, img_shape]) #resize the image

    #Scale? Yes/No
    if scale:
        return img/255.
    else:
        return img 

def plot_model_images(model):
    plt.figure(figsize=(17,10))
    for i in range(9):
        #Choose random imgs
        class_name = random.choice(class_names)
        filename = random.choice(os.listdir(test_dir + '/' + class_name))
        filepath = test_dir + '/' + class_name + '/' + filename
        
        #load the imgs and make preds
        img = load_and_prep_image(filepath, scale=False) #load img
        img_expanded = tf.expand_dims(img, axis=0) #Expand dims to match the model's expected shape
        pred_prob = model.predict(img_expanded) #get probs array
        pred_class = class_names[pred_prob.argmax()] #get highest pred prob and attach to class
        
        plt.subplot(3,3,i+1)
        plt.imshow(img/255.)
        if class_name == pred_class:
            title_color = 'g'
        else:
            title_color = 'r'
        plt.title(f'Actual: {class_name}, pred: {pred_class}\nprob: {pred_prob.max():.2f}', c=title_color)
        plt.axis(False)


plot_model_images(model_0)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 64ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 28ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step


# Train the model some more with LR scheduler to see if I can improve it

model_1 = tf.keras.models.clone_model(model_0)
model_1.set_weights(model_0.get_weights())
model_1.name = 'Model_1'
model_1.summary()

Model: "Model_1"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_2 (Cast)                   │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_3 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,162 (266.26 KB)

 Trainable params: 67,970 (265.51 KB)

 Non-trainable params: 192 (768.00 B)


checkpoint_path = f'model_checkpoint_weights/{model_1.name}_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = False,
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', 
    factor=0.5, 
    patience=3, 
    min_lr=1e-6, 
    verbose=1
)

model_1.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])

history_1 = model_1.fit(
    train_data,
    epochs=30,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[checkpoint_callback, early_stop_callback, reduce_lr]
)


#Save the model
model_1.save('models/model_1.keras')


#Save history 
history_1_df = pd.DataFrame(history_1.history)


#Evalute new model accuracy
model_1_eval = model_1.evaluate(test_data, return_dict=True)
for metric, value in model_1_eval.items():
    print(f'{str(metric).title()}: {value:.3f}')

227/227 ━━━━━━━━━━━━━━━━━━━━ 44s 192ms/step - accuracy: 0.9560 - loss: 0.1373
Accuracy: 0.956
Loss: 0.137


# Plot curves
plot_train_val_curves(history_1_df.iloc[:, :-1], 'Model 1')


# Make preds and plot confusion matrix
model_1_pred = model_1.predict(test_data)
model_1_pred = np.argmax(model_1_pred, axis=1)

make_confusion_matrix(y_true_argmax, model_1_pred, classes=class_names, title='Model 1 Confusion Matrix')

227/227 ━━━━━━━━━━━━━━━━━━━━ 43s 187ms/step


# Let's see how it is performing with images and predictions
plot_model_images(model_1)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 58ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 21ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step


# Test performance with internet pics

def pred_and_plot(model=None, final_test_dir=None, class_names=None):
  plt.figure(figsize=(10,10))
  # Choose 9 random images
  filepaths = [final_test_dir + '/' + os.listdir(final_test_dir)[i] for i in range(len(os.listdir(final_test_dir)))]
  imgs = random.choices(filepaths, k=9)
  for i in range(9):
    img_path = imgs[i]
    img = load_and_prep_image(img_path, scale=False)
    img_expanded = tf.expand_dims(img, axis=0) 
    pred_prob = model.predict(img_expanded) 
    pred_class = class_names[pred_prob.argmax()] 

    plt.subplot(3,3,i+1)
    plt.imshow(img/255.)
    plt.title(f'pred: {pred_class}\nprob: {pred_prob.max():.2f}')
    plt.axis(False)
  plt.subplots_adjust(hspace=.3, wspace=.01)


pred_and_plot(model_1, final_test_dir='data/final_test_img', class_names=class_names)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 65ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 25ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 20ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 22ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 19ms/step


# Bad performance with complex images, as expected. Let's move onto using more complex images for training


# Now to train the same earlier model but with the new data and see how it goes

# Create the train and test dir paths

train_dir = 'data/GroceryStoreDataset/dataset/train_labeled'
test_dir = 'data/GroceryStoreDataset/dataset/test_labeled'

IMG_SIZE = (224,224)

# Flow data

train_data = image_dataset_from_directory(
    directory=train_dir,
    image_size=IMG_SIZE,
    batch_size=64,
    label_mode='categorical'
)

test_data = image_dataset_from_directory(
    directory=test_dir,
    image_size=IMG_SIZE,
    batch_size=64,
    label_mode='categorical'
)

Found 1776 files belonging to 34 classes.
Found 1704 files belonging to 34 classes.


# Visualise random images from training dataset

target_classes = random.choices(train_data.class_names, k=9)

random_images_path = []

for data_class in target_classes:
    target_dir = train_dir + '/' + data_class
    random_image = random.choice(os.listdir(target_dir))
    random_images_path.append(train_dir + '/' + data_class + '/' + random_image)


fig, axes = plt.subplots(3,3,figsize=(9,9))

for path, ax in zip(random_images_path, axes.ravel()):
    img_plot = mpimg.imread(path)
    img_class = path.split('/')[-2]
    ax.imshow(img_plot)
    ax.set_title(f'Random Image\nclass: {img_class}');
    ax.axis(False);
plt.subplots_adjust(hspace=.3)


# Create class weight dict for training to handle class imbalance

all_labels = []

for _, labels in train_data.unbatch():
    class_index = tf.argmax(labels).numpy()
    all_labels.append(class_index)

all_labels = np.array(all_labels)


class_names = train_data.class_names
num_classes = len(class_names)

class_weights_array = compute_class_weight(
    class_weight='balanced',
    classes=np.arange(num_classes),
    y=all_labels
)

class_weights = dict(enumerate(class_weights_array))


# Transfer model to new model as feature extractor and attach new head since the data and number classes is different
feature_extractor = tf.keras.Model(
    inputs=model_1.input,
    outputs=model_1.layers[-2].output 
)

feature_extractor.trainable = False

outputs = layers.Dense(
    len(class_names), activation='softmax', name='output_layer', dtype='float32'
)(feature_extractor.output)

model_2 = tf.keras.Model(feature_extractor.input, outputs, name='Model_2')

model_2.summary()

Model: "Model_2"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_2 (Cast)                   │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 32)   │           128 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 220, 220, 32)   │         9,248 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 110, 110, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 108, 108, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 108, 108, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 106, 106, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 53, 53, 64)     │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 64)             │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_4 (Cast)                   │ (None, 64)             │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         2,210 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 68,162 (266.26 KB)

 Trainable params: 2,210 (8.63 KB)

 Non-trainable params: 65,952 (257.62 KB)


# Repeat similar tranining for model 0
tf.random.set_seed(0)

AUTOTUNE = tf.data.AUTOTUNE

train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
test_data = test_data.cache().prefetch(buffer_size=AUTOTUNE)


checkpoint_path = f'model_checkpoint_weights/{model_2.name}_checkpoint.weights.h5'

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = True,
    monitor='val_accuracy',
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

model_2.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])


history_2 = model_2.fit(
    train_data,
    epochs=100,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[checkpoint_callback,early_stop_callback]
)


#Save the model
model_2.save('models/model_2.keras')


#Save history 
history_2_df = pd.DataFrame(history_2.history)


#Plot curves
plot_train_val_curves(history_2_df, 'Model 2')


#Fine tune 

feature_extractor.trainable=True
for layer in feature_extractor.layers[:-8]:
    layer.trainable = False

checkpoint_path = f'model_checkpoint_weights/{model_2.name}_finetuned_checkpoint.weights.h5'
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    monitor='val_accuracy',
    save_weights_only = True,
    save_best_only = True,
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

model_2.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(learning_rate=0.0001),
                metrics=['accuracy'])


checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = False,
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_accuracy', 
    factor=0.5, 
    patience=5, 
    min_lr=1e-7, 
    verbose=1
)

history_2_finetuned = model_2.fit(
    train_data,
    epochs=200,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[checkpoint_callback, early_stop_callback, reduce_lr]
)


#Save the model
model_2.save('models/model_2_finetuned.keras')

#Save history 
history_2_finetuned_df = pd.DataFrame(history_2_finetuned.history)


#Plot curves
plot_train_val_curves(history_2_finetuned_df.iloc[:,:4], 'Model 2 Fine-tuned')


# Pretty bad performance, probably because the model is not deep enough and the images are messy. Let's try a deeper model


inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')

data_augmentation = Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomContrast(0.1),
    layers.RandomZoom(.2)
    ],
    name = 'data_augmentation'
)


x = data_augmentation(inputs)
x = tf.keras.layers.Rescaling(1./255)(x)

x = layers.Conv2D(64, 3, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(64, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)

x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(128, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.3)(x)

x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(256, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.4)(x)


x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.5)(x)

x = layers.Conv2D(512, 3, padding='same', activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001))(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D()(x)
x = layers.Dropout(0.5)(x)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(len(class_names), activation='softmax', dtype='float32')(x)

model_3 = tf.keras.Model(inputs, outputs, name="Model_3")

model_3.summary()

Model: "Model_3"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast (Cast)                     │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ rescaling (Rescaling)           │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 222, 222, 64)   │         1,792 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization             │ (None, 222, 222, 64)   │           256 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 222, 222, 64)   │        36,928 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d (MaxPooling2D)    │ (None, 111, 111, 64)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 111, 111, 128)  │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_1           │ (None, 111, 111, 128)  │           512 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 111, 111, 128)  │       147,584 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_1 (MaxPooling2D)  │ (None, 55, 55, 128)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 55, 55, 128)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_4 (Conv2D)               │ (None, 55, 55, 256)    │       295,168 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_2           │ (None, 55, 55, 256)    │         1,024 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_5 (Conv2D)               │ (None, 55, 55, 256)    │       590,080 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_2 (MaxPooling2D)  │ (None, 27, 27, 256)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_1 (Dropout)             │ (None, 27, 27, 256)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_6 (Conv2D)               │ (None, 27, 27, 512)    │     1,180,160 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_3           │ (None, 27, 27, 512)    │         2,048 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_7 (Conv2D)               │ (None, 27, 27, 512)    │     2,359,808 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_3 (MaxPooling2D)  │ (None, 13, 13, 512)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_2 (Dropout)             │ (None, 13, 13, 512)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_8 (Conv2D)               │ (None, 13, 13, 512)    │     2,359,808 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ batch_normalization_4           │ (None, 13, 13, 512)    │         2,048 │
│ (BatchNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ max_pooling2d_4 (MaxPooling2D)  │ (None, 6, 6, 512)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_3 (Dropout)             │ (None, 6, 6, 512)      │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 512)            │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 128)            │        65,664 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_4 (Dropout)             │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense_1 (Dense)                 │ (None, 34)             │         4,386 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 7,121,122 (27.16 MB)

 Trainable params: 7,118,178 (27.15 MB)

 Non-trainable params: 2,944 (11.50 KB)


checkpoint_path = f'model_checkpoint_weights/{model_3.name}_checkpoint.weights.h5'

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = True,
    monitor='val_accuracy',
    save_freq = 'epoch', #save every epoch
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=5,
    restore_best_weights=True,
    verbose=1
)

model_3.compile(loss=CategoricalCrossentropy(),
                optimizer=Adam(),
                metrics=['accuracy'])


history_3 = model_3.fit(
    train_data,
    epochs=100,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[checkpoint_callback,early_stop_callback]
)


#Save the model
model_3.save('models/model_3.keras')



#Save history 
history_3_df = pd.DataFrame(history_3.history)


#Plot curves
plot_train_val_curves(history_3_df.iloc[:,:4], 'Model 3')


model_3_eval = model_3.evaluate(test_data, return_dict=True)
for metric, value in model_3_eval.items():
    print(f'{str(metric).title()}: {value:.3f}')

27/27 ━━━━━━━━━━━━━━━━━━━━ 20s 731ms/step - accuracy: 0.0879 - loss: 6.4976
Accuracy: 0.085
Loss: 6.520


# Accuracy and performance are very low. This is not worh fine-tuning. Let's try with a complex EfficientNet model.


# Transfer learning from EffNet

base_model = tf.keras.applications.efficientnet_v2.EfficientNetV2S(include_top=False, input_shape=IMG_SIZE + (3,), weights='imagenet')
base_model.trainable = False

data_augmentation = tf.keras.Sequential([
    layers.RandomFlip('horizontal'),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomTranslation(0.1, 0.1),
    layers.RandomContrast(0.1),
], name='data_augmentation')

# Set up model acrchitecture with trainable top layers
inputs = layers.Input(shape=IMG_SIZE+(3,), name='input_layer')
x = data_augmentation(inputs)
x = base_model(x, training=False) 
x = tf.keras.applications.efficientnet.preprocess_input(x)
x = layers.GlobalAveragePooling2D(name='global_avg_pool')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.0001))(x)
outputs = layers.Dense(
    len(class_names), 
    activation='softmax',
    name='output_layer',
    dtype='float32'
    )(x)
model_4 = tf.keras.Model(inputs,outputs, name='Model_4')

model_4.summary()

Model: "Model_4"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_9 (Cast)                   │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ data_augmentation (Sequential)  │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ efficientnetv2-s (Functional)   │ (None, 7, 7, 1280)     │    20,331,360 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_avg_pool                 │ (None, 1280)           │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout_2 (Dropout)             │ (None, 1280)           │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 128)            │       163,968 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ cast_10 (Cast)                  │ (None, 128)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ output_layer (Dense)            │ (None, 34)             │         4,386 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 20,499,714 (78.20 MB)

 Trainable params: 168,354 (657.63 KB)

 Non-trainable params: 20,331,360 (77.56 MB)


checkpoint_path = f'model_checkpoint_weights/{model_4.name}_checkpoint.weights.h5'

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = True,
    monitor='val_accuracy',
    save_freq = 'epoch', 
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=6,
    restore_best_weights=True,
    verbose=1
)

lr_schedule = tf.keras.optimizers.schedules.CosineDecay(
    initial_learning_rate=0.001,
    decay_steps=10000
)

model_4.compile(loss=CategoricalCrossentropy(label_smoothing=0.1),
                optimizer=Adam(learning_rate=lr_schedule),
                metrics=['accuracy'])


history_4 = model_4.fit(
    train_data,
    epochs=30,
    steps_per_epoch=int(len(train_data)*.25),
    validation_data=test_data,
    validation_steps=int(len(test_data)*.25),
    class_weight=class_weights,
    callbacks=[
        checkpoint_callback
        , early_stop_callback
        ]
)


#Save the model
model_4.save('models/model_4.keras')



#Save history 
history_4_df = pd.DataFrame(history_4.history)


#Plot curves
plot_train_val_curves(history_4_df.iloc[:,:4], 'Model 4')


model_4_eval = model_4.evaluate(test_data, return_dict=True)
for metric, value in model_4_eval.items():
    print(f'{str(metric).title()}: {value:.3f}')

27/27 ━━━━━━━━━━━━━━━━━━━━ 100s 3s/step - accuracy: 0.6637 - loss: 1.7993
Accuracy: 0.668
Loss: 1.762


#That's much better. Now this can be fine-tuned.


# Fine tune
base_model.trainable=True

# Unfreeze Top N layers # Do full fine-tune if acc doesn't improve
n_layers = 150
for layer in base_model.layers[:-n_layers]:
    layer.trainable = False

checkpoint_path = f'model_checkpoint_weights/{model_4.name}_finetuned_checkpoint.weights.h5'

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_path,
    save_weights_only = True,
    save_best_only = True,
    monitor='val_accuracy',
    mode='max', 
    verbose = 1
)

early_stop_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_accuracy',
    patience=10,
    restore_best_weights=True,
    mode='max',
    verbose=1
)

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', 
    factor=0.5, 
    patience=4, 
    min_lr=1e-7, 
    verbose=1
)

model_4.compile(loss=CategoricalCrossentropy(label_smoothing=0.1),
                optimizer=Adam(learning_rate=1e-4),
                metrics=['accuracy'])


history_4_finetuned = model_4.fit(
    train_data,
    epochs=100,
    steps_per_epoch=int(len(train_data)),
    validation_data=test_data,
    validation_steps=int(len(test_data)),
    class_weight=class_weights,
    callbacks=[checkpoint_callback
               , early_stop_callback
               , reduce_lr 
               ]
)


#Save/load the model
model_4.save('models/model_4_finetuned.keras')

model_4_finetuned = tf.keras.models.load_model('models/model_4_finetuned.keras')



#Save history 
history_4_finetuned_df = pd.read_csv('model_history_records/history_4_finetuned.csv')


#Plot curves
plot_train_val_curves(history_4_finetuned_df.iloc[:,:4], 'Model 4 FT')


# Unbatch and collect true labels
y_true = []
for _, label in test_data.unbatch():
    y_true.append(label.numpy())

# Convert to a np array
y_true = np.array(y_true)

# Collect labels
y_true_argmax = [np.argmax(y_true[i]) for i in range(len(y_true))]


# Make preds and plot confusion matrix
model_4_pred = model_4_finetuned.predict(test_data)
model_4_pred = np.argmax(model_4_pred, axis=1)

27/27 ━━━━━━━━━━━━━━━━━━━━ 49s 2s/step


make_confusion_matrix(y_true_argmax, model_4_pred, classes=class_names, title='Model 4 FT Confusion Matrix')


plot_model_images(model_4)

1/1 ━━━━━━━━━━━━━━━━━━━━ 9s 9s/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 552ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 1s/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 232ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 216ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 833ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 224ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 160ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 161ms/step


# Much better performance on messy images


pred_and_plot(model_4, final_test_dir='data/final_test_img', class_names=class_names)

1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 360ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 159ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 159ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 157ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 153ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 156ms/step
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 151ms/step


# Also much better performance on the noisy images from the internet!

Supermarket Vision - Food and Veggie Classifier with Tensorflow¶

Libraries and Setup¶

Step 1: Base Data (Fruit 360)¶

Step 2: Real-world Data (GroceryStore)¶