Skip to content
Snippets Groups Projects
Commit 43a3caa3 authored by Pierre LOTTE's avatar Pierre LOTTE
Browse files

Add possibility to merge or not

parent a60e4fc7
Branches
No related tags found
No related merge requests found
...@@ -63,6 +63,11 @@ if __name__ == "__main__": ...@@ -63,6 +63,11 @@ if __name__ == "__main__":
help="Use Docker containers directly to run the algorithm. Allow to run algorithms without cloning repo", help="Use Docker containers directly to run the algorithm. Allow to run algorithms without cloning repo",
action="store_true" action="store_true"
) )
parser.add_argument(
"--merge",
help="Whether the method should merge the correlation matrices or not.",
action="store_true"
)
# Load args # Load args
args = parser.parse_args() args = parser.parse_args()
...@@ -157,7 +162,7 @@ if __name__ == "__main__": ...@@ -157,7 +162,7 @@ if __name__ == "__main__":
if args.split and args.task in ["train", "all"]: if args.split and args.task in ["train", "all"]:
splitter = BaseSplitter(f"{INPUT_DIR}/{config_name}") splitter = BaseSplitter(f"{INPUT_DIR}/{config_name}")
splitter.split_data(method=args.method) splitter.split_data(method=args.method, merge=args.merge)
# ================================================================================================================= # =================================================================================================================
# Train algorithm # Train algorithm
......
...@@ -31,7 +31,7 @@ class BaseSplitter: ...@@ -31,7 +31,7 @@ class BaseSplitter:
self.output_path = f"{path}/splitting" self.output_path = f"{path}/splitting"
os.makedirs(f"{path}/splitting", exist_ok=True) os.makedirs(f"{path}/splitting", exist_ok=True)
def split_data(self, method="HDBSCAN"): def split_data(self, method="HDBSCAN", merge=False):
""" """
This method will be in charge of splitting data into subsystems. This method will be in charge of splitting data into subsystems.
""" """
...@@ -49,7 +49,7 @@ class BaseSplitter: ...@@ -49,7 +49,7 @@ class BaseSplitter:
# cluters from its coefficient # cluters from its coefficient
max_silhouette = 0 max_silhouette = 0
best_clusters = None best_clusters = None
x = self._compute_correlations(w_df) x = self._compute_correlations(w_df, merge=merge)
if "HDBSCAN" == method: if "HDBSCAN" == method:
model = HDBSCAN(min_cluster_size=2, allow_single_cluster=True, n_jobs=-1) model = HDBSCAN(min_cluster_size=2, allow_single_cluster=True, n_jobs=-1)
...@@ -83,7 +83,7 @@ class BaseSplitter: ...@@ -83,7 +83,7 @@ class BaseSplitter:
labels = np.bitwise_or.reduce(labels_df.drop(columns=drop).to_numpy(), axis=1, dtype=np.int32) labels = np.bitwise_or.reduce(labels_df.drop(columns=drop).to_numpy(), axis=1, dtype=np.int32)
pd.DataFrame(labels).to_csv(f"{self.data_path}/dataset_{i}_auto_split_labels.csv", index=False) pd.DataFrame(labels).to_csv(f"{self.data_path}/dataset_{i}_auto_split_labels.csv", index=False)
def _compute_correlations(self, data): def _compute_correlations(self, data, merge=False):
""" """
Compute the vector of correlation coefficients for each of the variable of the dataset. Compute the vector of correlation coefficients for each of the variable of the dataset.
""" """
...@@ -106,11 +106,14 @@ class BaseSplitter: ...@@ -106,11 +106,14 @@ class BaseSplitter:
x.append(np.abs(correlation_matrix)) x.append(np.abs(correlation_matrix))
x = np.array(x) if merge:
x = np.mean(x, axis=0) x = np.array(x)
x = np.mean(x, axis=0)
sns.heatmap(x, annot=True, cmap="coolwarm")\ sns.heatmap(x, annot=True, cmap="coolwarm")\
.get_figure()\ .get_figure()\
.savefig(f"{self.output_path}/dataset_final_matrix.png") .savefig(f"{self.output_path}/dataset_final_matrix.png")
else:
x = np.concatenate(x, axis=1)
return x return x
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment