Skip to content
Snippets Groups Projects
Commit 727a44f1 authored by Pierre LOTTE's avatar Pierre LOTTE
Browse files

Fix optimizer, add time data and add optim param

parent 93168ea6
Branches
Tags
No related merge requests found
......@@ -47,6 +47,11 @@ if __name__ == "__main__":
help="Automatically split dataset into subsystems and treat them once at a time.",
action="store_true"
)
parser.add_argument(
"-optim",
help="Optimize the different hyper parameters according to what's wirtten in the algorithm_params.json file",
action="store_true"
)
parser.add_argument(
"-d", "--docker",
help="Use Docker containers directly to run the algorithm. Allow to run algorithms without cloning repo",
......@@ -151,13 +156,13 @@ if __name__ == "__main__":
algo_params = json.load(f)
with open(f"{INPUT_DIR}/{config_name}/time.csv", "a", encoding="utf-8") as f:
f.write("Algorithm,dataset,duration\n")
f.write("Algorithm,Dataset,Step,Duration\n")
for algo in args.algorithms:
params = algo_params[algo]
train = params.pop("training")
trainer = BaseTrainer(f"{INPUT_DIR}/{config_name}", algo, train, **params)
trainer.start()
trainer.start(optim=args.optim)
trainers.append(trainer)
# =================================================================================================================
......
......@@ -14,21 +14,6 @@ from sklearn.metrics import roc_auc_score#, f1_score
from .correlations import CORRELATION_CLASSES
def __vote_for_score(scores, length):
"""
Compute the score for each point of the dataset instead of a per window basis.
"""
scores = np.nan_to_num(scores)
results = np.zeros(length)
w_size = length - len(scores) + 1
for idx in range(length):
start = idx - w_size if idx - w_size >= 0 else 0
end = idx + w_size if idx + w_size < length else length
results[idx] = np.mean(scores[start:end])
return results
class BaseTrainer():
......@@ -46,7 +31,7 @@ class BaseTrainer():
self.train = train
self.pwd = ""
def start(self):
def start(self, optim=False):
"""
This method orchestrates the optimization, training and computing of the results for the
current algorithm
......@@ -61,13 +46,19 @@ class BaseTrainer():
path = f"{self.pwd}/{self.data_path}"
os.makedirs(f"{path}/results_{self.algorithm}", exist_ok=True)
optimize_time = self.optimize("dataset", path)
if optim:
optimize_time = self.optimize("dataset", path)
with open(f"{path}/time.csv", "a", encoding="utf-8") as f:
f.write(f"{self.algorithm},dataset,optimize,{optimize_time}\n")
files = __exec(f"find -L {self.data_path} -regex '^.*dataset[-_0-9]*\(_auto_split\)*.csv'")
for file in files:
file_name = file.split("/")[-1][:-4]
train_time, run_time = self.run(file_name, path)
with open(f"{path}/time.csv", "a", encoding="utf-8") as f:
f.write(f"{self.algorithm},{file_name},train,{train_time}")
f.write(f"{self.algorithm},{file_name},run,{run_time}")
def optimize(self, file, path):
"""
......@@ -76,6 +67,22 @@ class BaseTrainer():
It uses Bayesian Optimization to search within the space defined in the file
`algorithm_params.json` present at the root of the project.
"""
def __vote_for_score(scores, length):
"""
Compute the score for each point of the dataset instead of a per window basis.
"""
scores = np.nan_to_num(scores)
results = np.zeros(length)
w_size = length - len(scores) + 1
for idx in range(length):
start = idx - w_size if idx - w_size >= 0 else 0
end = idx + w_size if idx + w_size < length else length
results[idx] = np.mean(scores[start:end])
return results
def __exec(cmd) -> str:
"""
Execute a shell command and process its output as expected.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment