import os
import shutil
#streamline folder names
root_dir = '.'
for current_path, subdirs, files in os.walk(root_dir, topdown=False):
for subdir in subdirs:
old_path = os.path.join(current_path, subdir)
# Transform folder name
new_name = subdir.lower().replace(' ', '_')
new_path = os.path.join(current_path, new_name)
if old_path != new_path: # Rename only if different
os.rename(old_path, new_path)
def organise_fruits360(parent='train'):
# Create new _labeled dir
root = f'./{parent}/'
new_root = f'./{parent}_labeled/'
os.makedirs(new_root, exist_ok=True)
# Create category subdirs
for dir in os.listdir(root):
if os.path.isdir(os.path.join(root,dir)):
dir_name = dir.split('_')[0]
os.makedirs(os.path.join(new_root, dir_name), exist_ok=True)
# Copy files into appropriate category subdirs
for dir in os.listdir(root):
subdir = os.path.join(root, dir)
if os.path.isdir(subdir):
for file_name in os.listdir(subdir):
category = subdir.split('/')[2].split('_')[0]
old_path = os.path.join(subdir, file_name)
new_filename = f'{str(dir)}_{file_name}'
new_path = os.path.join(new_root, category, new_filename)
if old_path != new_path:
shutil.copy2(old_path, new_path)
organise_fruits360('train')
organise_fruits360('test')
organise_fruits360('val')
import pandas as pd
#Get classes labels
classes_df = pd.read_csv('classes.csv')
fine_classes_dict = dict(zip(classes_df.iloc[:,1], classes_df.iloc[:,0]))
coarse_classes_dict = dict(zip(classes_df.iloc[:,3], classes_df.iloc[:,2]))
# Read file info
train_files = pd.read_csv('train.txt', header=None)
train_files.columns = ['filename', 'fine_class', 'coarse_class']
train_files['fine_class_label'] = train_files['fine_class'].map(fine_classes_dict)
train_files['coarse_class_label'] = train_files['coarse_class'].map(coarse_classes_dict)
test_files = pd.read_csv('test.txt', header=None)
test_files.columns = ['filename', 'fine_class', 'coarse_class']
test_files['fine_class_label'] = test_files['fine_class'].map(fine_classes_dict)
test_files['coarse_class_label'] = test_files['coarse_class'].map(coarse_classes_dict)
val_files = pd.read_csv('val.txt', header=None)
val_files.columns = ['filename', 'fine_class', 'coarse_class']
val_files['fine_class_label'] = val_files['fine_class'].map(fine_classes_dict)
val_files['coarse_class_label'] = val_files['coarse_class'].map(coarse_classes_dict)
# Define classes to exclude
category_df = pd.DataFrame({
'Category': [i.split('/')[1] for i in train_files['filename']],
'Type': [i.split('/')[2] for i in train_files['filename']]
})
labels_to_exclude = category_df[category_df['Category'] == 'Packages']['Type'].unique()
#Create function to organise files
def organise_files(dataset='train', files_info = train_files):
# Create new dir for appropriately labeled data
new_dir = f'{dataset}_labeled'
os.makedirs(new_dir, exist_ok=True)
#Create subdirectories for each label
for label in files_info['coarse_class_label'].unique():
if label not in labels_to_exclude:
label_dir = os.path.join(new_dir, label.lower())
os.makedirs(label_dir, exist_ok=True)
# Copy files into correct label directories
for _, row in files_info.iterrows():
filename = row['filename']
coarse_label = row['coarse_class_label']
if coarse_label not in labels_to_exclude:
file_path = os.path.join(filename)
target_path = os.path.join(new_dir, coarse_label)
if os.path.isfile(file_path):
shutil.copy2(file_path, target_path)
organise_files('train', train_files)
organise_files('test', test_files)
organise_files('val', val_files)