diff --git a/run_exp.py b/run_exp.py index b0399a699d489005483611f94c50929190c39708..16c8e317252471133d8b790f929ce0654863b427 100644 --- a/run_exp.py +++ b/run_exp.py @@ -4,31 +4,34 @@ import subprocess # Path to your CSV file csv_file = "exp_configs.csv" -# Read the second line from the CSV file +# Open and read the CSV file with open(csv_file, newline='') as csvfile: reader = csv.reader(csvfile) - # Skip the header (if any) and the first row - next(reader) # Skipping the header - row = next(reader) # Reading the second row + # Skip the header (first row) + next(reader) - # Assigning CSV values to variables - exp_type, dataset, nn_model, heterogeneity_type, num_clients, num_samples_by_label, num_clusters, centralized_epochs, federated_rounds, seed = row + # Iterate over each row (experiment configuration) in the CSV file + for row in reader: + # Assigning CSV values to variables + exp_type, dataset, nn_model, heterogeneity_type, num_clients, num_samples_by_label, num_clusters, centralized_epochs, federated_rounds, seed = row - # Building the command - command = [ - "python", "driver.py", - "--exp_type", exp_type, - "--dataset", dataset, + # Building the command to run the driver.py script with the corresponding arguments + command = [ + "python", "driver.py", + "--exp_type", exp_type, + "--dataset", dataset, + "--nn_model", nn_model, + "--heterogeneity_type", heterogeneity_type, + "--num_clients", num_clients, + "--num_samples_by_label", num_samples_by_label, + "--num_clusters", num_clusters, + "--centralized_epochs", centralized_epochs, + "--federated_rounds", federated_rounds, + "--seed", seed] - "--nn_model", nn_model, - "--heterogeneity_type", heterogeneity_type, - "--num_clients", num_clients, - "--num_samples_by_label", num_samples_by_label, - "--num_clusters", num_clusters, - "--centralized_epochs", centralized_epochs, - "--federated_rounds", federated_rounds, - "--seed", seed] + # Print the command to check it before running (optional) + print(f"Running command: {' '.join(command)}") - # Run the command - subprocess.run(command) + # Run the command + subprocess.run(command) diff --git a/src/utils_data.py b/src/utils_data.py index db1d33e18bc3ee207b5f7c1b1957a5b3306d2a0c..1d67a068b5c611320541a6c303dc2f0444759724 100644 --- a/src/utils_data.py +++ b/src/utils_data.py @@ -183,7 +183,7 @@ class AddRandomJitter(object): saturation = self.saturation, hue = self.hue) return transform(tensor) -class CifarDataset(Dataset): +class CustomDataset(Dataset): def __init__(self, data, labels, transform=None): # Ensure data is in (N, H, W, C) format @@ -257,9 +257,9 @@ def data_preparation(client: Client, row_exp: dict) -> None: # Create datasets with transformations - train_dataset = CifarDataset(x_train, y_train, transform=train_transform) - val_dataset = CifarDataset(x_val, y_val, transform=test_val_transform) - test_dataset = CifarDataset(x_test, y_test, transform=test_val_transform) + train_dataset = CustomDataset(x_train, y_train, transform=train_transform) + val_dataset = CustomDataset(x_val, y_val, transform=test_val_transform) + test_dataset = CustomDataset(x_test, y_test, transform=test_val_transform) # Create DataLoaders train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) @@ -616,12 +616,14 @@ def swap_labels(labels : list, client : Client, heterogeneity_class : int) -> Cl return client -def centralize_data(list_clients: list) -> Tuple[DataLoader, DataLoader]: +def centralize_data(list_clients: list, row_exp: dict) -> Tuple[DataLoader, DataLoader]: """Centralize data of the federated learning setup for central model comparison Arguments: list_clients : The list of Client Objects + row_exp : The current experiment's global parameters + Returns: Train and test torch DataLoaders with data of all Clients """ @@ -630,18 +632,26 @@ def centralize_data(list_clients: list) -> Tuple[DataLoader, DataLoader]: import torch from torch.utils.data import DataLoader,TensorDataset import numpy as np - - train_transform = transforms.Compose([ + + if row_exp['dataset'] == 'cifar10': + train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomRotation(20), # Normalize if needed transforms.RandomCrop(32, padding=4), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) + ]) + # Transform for validation and test data (no augmentation, just normalization) + test_val_transform = transforms.Compose([ + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) + else : + train_transform = transforms.Compose([ + transforms.Normalize((0.5,), (0.5,)), # Normalize if needed ]) - - # Transform for validation and test data (no augmentation, just normalization) - test_val_transform = transforms.Compose([ - transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # Normalize if needed - ]) + # Transform for validation and test data (no augmentation, just normalization) + test_val_transform = transforms.Compose([ + transforms.Normalize((0.5,), (0.5,)), # Normalize if needed + ]) + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -658,9 +668,9 @@ def centralize_data(list_clients: list) -> Tuple[DataLoader, DataLoader]: y_test = np.concatenate([list_clients[id].train_test['y_test'] for id in range(len(list_clients))], axis=0) # Create Custom Datasets - train_dataset = CifarDataset(x_train, y_train, transform=train_transform) - val_dataset = CifarDataset(x_val, y_val, transform=test_val_transform) - test_dataset = CifarDataset(x_test, y_test, transform=test_val_transform) + train_dataset = CustomDataset(x_train, y_train, transform=train_transform) + val_dataset = CustomDataset(x_val, y_val, transform=test_val_transform) + test_dataset = CustomDataset(x_test, y_test, transform=test_val_transform) # Create DataLoaders train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True) @@ -672,6 +682,7 @@ def centralize_data(list_clients: list) -> Tuple[DataLoader, DataLoader]: + def unbalancing(client : Client ,labels_list : list ,ratio_list: list) -> Client : """ Downsample the dataset of a client with each elements of the labels_list will be downsampled by the corresponding ration of ratio_list diff --git a/src/utils_training.py b/src/utils_training.py index d1cc6d03d6e94316f29a4538d7e2bbc6cfda779f..8c77d04af9eb0a4518b9d6a372b94d4e90dc21a3 100644 --- a/src/utils_training.py +++ b/src/utils_training.py @@ -115,7 +115,7 @@ def run_benchmark(main_model : nn.Module, list_clients : list, row_exp : dict) - if row_exp['exp_type'] == 'pers-centralized': for heterogeneity_class in list_heterogeneities: list_clients_filtered = [client for client in list_clients if client.heterogeneity_class == heterogeneity_class] - train_loader, val_loader, test_loader = centralize_data(list_clients_filtered) + train_loader, val_loader, test_loader = centralize_data(list_clients_filtered,row_exp) model_trained, _ = train_central(curr_model, train_loader, val_loader, row_exp) global_acc = test_model(model_trained, test_loader) @@ -129,7 +129,7 @@ def run_benchmark(main_model : nn.Module, list_clients : list, row_exp : dict) - model_server = copy.deepcopy(curr_model) model_trained = train_federated(model_server, list_clients, row_exp, use_cluster_models = False) - _, _,test_loader = centralize_data(list_clients) + _, _,test_loader = centralize_data(list_clients,row_exp) global_acc = test_model(model_trained.model, test_loader) for client in list_clients : @@ -271,6 +271,8 @@ def test_model(model: nn.Module, test_loader: DataLoader) -> float: for batch in test_loader: inputs, labels = [item.to(device) for item in batch] + + labels = labels.long() outputs = model(inputs) # Compute the loss