diff --git a/main.py b/main.py index 8e3543d92ad5f879fc91343909d0567ad05c2b21..0788ca8453f60bd4d2476f0d65d70755beb227b6 100755 --- a/main.py +++ b/main.py @@ -47,6 +47,11 @@ if __name__ == "__main__": help="Automatically split dataset into subsystems and treat them once at a time.", action="store_true" ) + parser.add_argument( + "-optim", + help="Optimize the different hyper parameters according to what's wirtten in the algorithm_params.json file", + action="store_true" + ) parser.add_argument( "-d", "--docker", help="Use Docker containers directly to run the algorithm. Allow to run algorithms without cloning repo", @@ -151,13 +156,13 @@ if __name__ == "__main__": algo_params = json.load(f) with open(f"{INPUT_DIR}/{config_name}/time.csv", "a", encoding="utf-8") as f: - f.write("Algorithm,dataset,duration\n") + f.write("Algorithm,Dataset,Step,Duration\n") for algo in args.algorithms: params = algo_params[algo] train = params.pop("training") trainer = BaseTrainer(f"{INPUT_DIR}/{config_name}", algo, train, **params) - trainer.start() + trainer.start(optim=args.optim) trainers.append(trainer) # ================================================================================================================= diff --git a/trainers/base.py b/trainers/base.py index cdaec80df43d9cdb53717f060e1f74018b2788dc..a6adeafc211ae73fb034704f816008474465bf90 100644 --- a/trainers/base.py +++ b/trainers/base.py @@ -14,21 +14,6 @@ from sklearn.metrics import roc_auc_score#, f1_score from .correlations import CORRELATION_CLASSES -def __vote_for_score(scores, length): - """ - Compute the score for each point of the dataset instead of a per window basis. - """ - scores = np.nan_to_num(scores) - results = np.zeros(length) - w_size = length - len(scores) + 1 - - for idx in range(length): - start = idx - w_size if idx - w_size >= 0 else 0 - end = idx + w_size if idx + w_size < length else length - - results[idx] = np.mean(scores[start:end]) - - return results class BaseTrainer(): @@ -46,7 +31,7 @@ class BaseTrainer(): self.train = train self.pwd = "" - def start(self): + def start(self, optim=False): """ This method orchestrates the optimization, training and computing of the results for the current algorithm @@ -61,13 +46,19 @@ class BaseTrainer(): path = f"{self.pwd}/{self.data_path}" os.makedirs(f"{path}/results_{self.algorithm}", exist_ok=True) - optimize_time = self.optimize("dataset", path) + if optim: + optimize_time = self.optimize("dataset", path) + with open(f"{path}/time.csv", "a", encoding="utf-8") as f: + f.write(f"{self.algorithm},dataset,optimize,{optimize_time}\n") files = __exec(f"find -L {self.data_path} -regex '^.*dataset[-_0-9]*\(_auto_split\)*.csv'") for file in files: file_name = file.split("/")[-1][:-4] train_time, run_time = self.run(file_name, path) + with open(f"{path}/time.csv", "a", encoding="utf-8") as f: + f.write(f"{self.algorithm},{file_name},train,{train_time}") + f.write(f"{self.algorithm},{file_name},run,{run_time}") def optimize(self, file, path): """ @@ -76,6 +67,22 @@ class BaseTrainer(): It uses Bayesian Optimization to search within the space defined in the file `algorithm_params.json` present at the root of the project. """ + def __vote_for_score(scores, length): + """ + Compute the score for each point of the dataset instead of a per window basis. + """ + scores = np.nan_to_num(scores) + results = np.zeros(length) + w_size = length - len(scores) + 1 + + for idx in range(length): + start = idx - w_size if idx - w_size >= 0 else 0 + end = idx + w_size if idx + w_size < length else length + + results[idx] = np.mean(scores[start:end]) + + return results + def __exec(cmd) -> str: """ Execute a shell command and process its output as expected.