Prepare Fruit 360 Data¶

In [ ]:
import os
import shutil
In [ ]:
#streamline folder names
root_dir = '.'

for current_path, subdirs, files in os.walk(root_dir, topdown=False):
    for subdir in subdirs:
        old_path = os.path.join(current_path, subdir)
        # Transform folder name
        new_name = subdir.lower().replace(' ', '_')
        new_path = os.path.join(current_path, new_name)

        if old_path != new_path:  # Rename only if different
            os.rename(old_path, new_path)
In [ ]:
def organise_fruits360(parent='train'):
    # Create new _labeled dir
    root = f'./{parent}/'
    new_root = f'./{parent}_labeled/'
    os.makedirs(new_root, exist_ok=True)

    # Create category subdirs
    for dir in os.listdir(root):
        if os.path.isdir(os.path.join(root,dir)):
            dir_name = dir.split('_')[0]
            os.makedirs(os.path.join(new_root, dir_name), exist_ok=True)

    # Copy files into appropriate category subdirs
    for dir in os.listdir(root):
        subdir = os.path.join(root, dir)
        if os.path.isdir(subdir):
            for file_name in os.listdir(subdir):
                category = subdir.split('/')[2].split('_')[0]
                old_path = os.path.join(subdir, file_name)
                new_filename = f'{str(dir)}_{file_name}'
                new_path = os.path.join(new_root, category, new_filename)
                
                if old_path != new_path:
                    shutil.copy2(old_path, new_path)
In [ ]:
organise_fruits360('train')
In [ ]:
organise_fruits360('test')
In [ ]:
organise_fruits360('val')

Prepare GroceryStore Data¶

In [ ]:
import pandas as pd
In [ ]:
#Get classes labels
classes_df = pd.read_csv('classes.csv')

fine_classes_dict = dict(zip(classes_df.iloc[:,1], classes_df.iloc[:,0]))
coarse_classes_dict = dict(zip(classes_df.iloc[:,3], classes_df.iloc[:,2]))
In [ ]:
# Read file info
train_files = pd.read_csv('train.txt', header=None)
train_files.columns = ['filename', 'fine_class', 'coarse_class']
train_files['fine_class_label'] = train_files['fine_class'].map(fine_classes_dict)
train_files['coarse_class_label'] = train_files['coarse_class'].map(coarse_classes_dict)

test_files = pd.read_csv('test.txt', header=None)
test_files.columns = ['filename', 'fine_class', 'coarse_class']
test_files['fine_class_label'] = test_files['fine_class'].map(fine_classes_dict)
test_files['coarse_class_label'] = test_files['coarse_class'].map(coarse_classes_dict)

val_files = pd.read_csv('val.txt', header=None)
val_files.columns = ['filename', 'fine_class', 'coarse_class']
val_files['fine_class_label'] = val_files['fine_class'].map(fine_classes_dict)
val_files['coarse_class_label'] = val_files['coarse_class'].map(coarse_classes_dict)
In [ ]:
# Define classes to exclude
category_df = pd.DataFrame({
    'Category': [i.split('/')[1] for i in train_files['filename']],
    'Type': [i.split('/')[2] for i in train_files['filename']]
})

labels_to_exclude = category_df[category_df['Category'] == 'Packages']['Type'].unique()
In [ ]:
#Create function to organise files 
def organise_files(dataset='train', files_info = train_files):
    # Create new dir for appropriately labeled data
    new_dir = f'{dataset}_labeled'
    os.makedirs(new_dir, exist_ok=True)

    #Create subdirectories for each label
    for label in files_info['coarse_class_label'].unique():
        if label not in labels_to_exclude:
            label_dir = os.path.join(new_dir, label.lower())
            os.makedirs(label_dir, exist_ok=True)

    # Copy files into correct label directories
    for _, row in files_info.iterrows():
        filename = row['filename']
        coarse_label = row['coarse_class_label']

        if coarse_label not in labels_to_exclude:
            file_path = os.path.join(filename)
            target_path = os.path.join(new_dir, coarse_label)
            if os.path.isfile(file_path):
                shutil.copy2(file_path, target_path)
In [ ]:
organise_files('train', train_files)
In [ ]:
organise_files('test', test_files)
In [ ]:
organise_files('val', val_files)