From e656158ba7683c54d02f0e76306296c7bafbaee3 Mon Sep 17 00:00:00 2001
From: Pierre LOTTE <pierrelotte.dev@gmail.com>
Date: Mon, 23 Sep 2024 15:34:57 +0200
Subject: [PATCH] Add possibility to generate same config with different
 contamination levels

---
 config_maker/config_maker.py | 14 ++++++++++----
 config_maker/generator.py    |  4 +++-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/config_maker/config_maker.py b/config_maker/config_maker.py
index eb09fe1..db98196 100755
--- a/config_maker/config_maker.py
+++ b/config_maker/config_maker.py
@@ -4,6 +4,7 @@ This scripts automatically generates configurations usable by the TSG software.
 """
 
 import json
+from copy import deepcopy
 
 from argparse import ArgumentParser
 
@@ -26,6 +27,7 @@ if __name__ == "__main__":
         "-c",
         type=float,
         required=True,
+        nargs="+",
         help="Contamination percentage to use",
         dest="contamination"
     )
@@ -48,8 +50,12 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     # Generate anomalies
-    generator = ConfigGenerator(args.length).generate(args.nb_dim).add_anomalies(contamination=args.contamination)
-    config = generator.get_config()
+    generator = ConfigGenerator(args.length).generate(args.nb_dim)
 
-    with open(args.output, "w", encoding="utf-8") as f:
-        f.write(json.dumps(config))
+    for c in args.contamination:
+        gen_copy = deepcopy(generator)
+        config = gen_copy.add_anomalies(contamination=c).get_config()
+        with open(f"{args.output}/gc_d{args.nb_dim}_l{args.length}_c{int(c)}.json", "w", encoding="utf-8") as f:
+            f.write(json.dumps(config))
+
+    # .add_anomalies(contamination=args.contamination)
diff --git a/config_maker/generator.py b/config_maker/generator.py
index e7c58e7..9b71fb0 100644
--- a/config_maker/generator.py
+++ b/config_maker/generator.py
@@ -80,11 +80,13 @@ class ConfigGenerator:
         others. This choice is not motivated by any statistical or mathematical choice
         and could probably be improved.
         """
+        print(contamination)
         nb_points = int((self.config["length"]/100)*contamination)
         nb_sub = len(self.config["subsystems"])
 
-        anomalies_per_sub = [round(np.random.uniform(low=0.25, high=0.75), 4) for _ in range(nb_sub)]
+        anomalies_per_sub = [round(np.random.uniform(low=0, high=10), 4) for _ in range(nb_sub)]
         anomalies_per_sub = (self.__softmax(anomalies_per_sub) * nb_points).astype(int)
+        print(anomalies_per_sub)
 
         for idx, sub in enumerate(self.config["subsystems"]):
             nb_dim_erroneous = np.random.randint(1, int(len(sub)/2)) if len(sub) > 3 else 1
-- 
GitLab