Skip to content
Snippets Groups Projects
Commit 5723fb61 authored by Pierre LOTTE's avatar Pierre LOTTE
Browse files

Add log correlation

parent 674dea92
No related branches found
No related tags found
No related merge requests found
......@@ -10,6 +10,7 @@ from .frequency import FrequencyAnomaly
ANOMALIES = {
"CORRELATION": [NoiseAnomaly, CorrelationAnomaly],
"EXP_CORRELATION": [NoiseAnomaly, CorrelationAnomaly],
"LOG_CORRELATION": [NoiseAnomaly, CorrelationAnomaly],
"VFOSC": [NoiseAnomaly, VFAnomaly],
"OSC": [NoiseAnomaly, FrequencyAnomaly],
}
......@@ -45,17 +45,22 @@ if __name__ == "__main__":
help="Length of the output",
dest="length"
)
parser.add_argument(
"-s",
type=int,
required=True,
help="Number of subsets of variables",
dest="subsets"
)
# Fetch arguments from CLI
args = parser.parse_args()
# Generate anomalies
generator = ConfigGenerator(args.length).generate(args.nb_dim)
generator = ConfigGenerator(args.length).generate(args.nb_dim, args.subsets)
for c in args.contamination:
gen_copy = deepcopy(generator)
config = gen_copy.add_anomalies(contamination=c).get_config()
with open(f"{args.output}/gc-d_{args.nb_dim}-l_{args.length}-c_{int(c)}.json", "w", encoding="utf-8") as f:
f.write(json.dumps(config))
# .add_anomalies(contamination=args.contamination)
"""
This module provides the helper classes to generate each kind of dimensions.
"""
from .correlation import ExpCorrelationDimension, LinearCorrelationDimension
from .correlation import ExpCorrelationDimension, LinearCorrelationDimension, LogCorrelationDimension
from .oscillating import OscillatingDimension
from .varying_frequency import VFOscillatingDimension
NB_CORR = 2
NB_CORR = 3
DIMENSION_CLASSES = [
LinearCorrelationDimension,
LogCorrelationDimension,
ExpCorrelationDimension,
OscillatingDimension,
VFOscillatingDimension
VFOscillatingDimension,
]
......@@ -30,14 +30,14 @@ class LinearCorrelationDimension(BaseDimension):
"anomalies": []
}
class ExpCorrelationDimension(BaseDimension):
def generate(self, idx:int) -> Dict:
dimension = np.random.choice(range(idx))
# Choose start and end point of exponential function to be used
limits = np.random.uniform(low=-5.0, high=2.5, size=2)
limits.sort()
start, end = limits
start = np.random.uniform(low=-5.0, high=-2.5)
end = np.random.uniform(low=0.0, high=1.5)
# Choose lag
lag = np.random.randint(low=0, high=20)
......@@ -57,3 +57,31 @@ class ExpCorrelationDimension(BaseDimension):
},
"anomalies": []
}
class LogCorrelationDimension(BaseDimension):
def generate(self, idx:int) -> Dict:
dimension = np.random.choice(range(idx))
# Choose start and end point of exponential function to be used
start = np.random.uniform(low=-5.0, high=-2.5)
end = np.random.uniform(low=0.0, high=1.5)
# Choose lag
lag = np.random.randint(low=0, high=20)
return {
"kind": "LOG_CORRELATION",
"dimension": int(dimension),
"equation" : {
"sign": int(np.random.choice([-1, 1])),
"start": round(start, 4),
"end": round(end, 4),
"lag": int(lag)
},
"noise": {
"mean": 0.0,
"std": 0.1
},
"anomalies": []
}
......@@ -39,22 +39,16 @@ class ConfigGenerator:
"""
return self.config
def generate(self, nb_dim=20):
def generate(self, nb_dim=20, nb_subsets=2):
"""
This method is in charge of the generation of the configuration of a single dimension.
"""
# Generate dimensions
for i in range(nb_dim):
if i == 0:
if i < nb_subsets:
dim_type = np.random.choice(DIMENSION_CLASSES[NB_CORR:])
else:
# We make it progressively more likely to pick a correlated dimension
# The value of 0.8 is pretty much arbitrary. Change it as you like
# to change the size of the subsystems
p = np.ones(len(DIMENSION_CLASSES))
p[:NB_CORR] *= 1 + i/(i*0.9)
dim_type = np.random.choice(DIMENSION_CLASSES, p=self.__softmax(p))
dim_type = np.random.choice(DIMENSION_CLASSES[:NB_CORR])
self.config["dimensions"].append(dim_type(nb_dim).generate(i))
......@@ -80,13 +74,11 @@ class ConfigGenerator:
others. This choice is not motivated by any statistical or mathematical choice
and could probably be improved.
"""
print(contamination)
nb_points = int((self.config["length"]/100)*contamination)
nb_sub = len(self.config["subsystems"])
anomalies_per_sub = [round(np.random.uniform(low=0, high=10), 4) for _ in range(nb_sub)]
anomalies_per_sub = (self.__softmax(anomalies_per_sub) * nb_points).astype(int)
print(anomalies_per_sub)
for idx, sub in enumerate(self.config["subsystems"]):
nb_dim_erroneous = np.random.randint(1, int(len(sub)/2)) if len(sub) > 3 else 1
......
......@@ -7,6 +7,7 @@ from .affine import AffineDimension
from .exp_correlation import ExponentialCorrelationDimension
from .inertia import InertiaDimension
from .linear_correlation import LinearCorrelationDimension
from .log_correlation import LogCorrelationDimension
from .oscillating import OscillatingDimension
from .varying_frequency import VFOscillatingDimension
......@@ -15,6 +16,7 @@ DIMENSION_CLASSES = {
"OSC": OscillatingDimension,
"CORRELATION": LinearCorrelationDimension,
"EXP_CORRELATION": ExponentialCorrelationDimension,
"LOG_CORRELATION": LogCorrelationDimension,
"INERTIA": InertiaDimension,
"VFOSC": VFOscillatingDimension
"VFOSC": VFOscillatingDimension,
}
......@@ -13,7 +13,7 @@ class ExponentialCorrelationDimension(BaseDimension):
"""
def generate(self) -> np.array:
# Compute useful data
exp_diffs = np.exp(np.linspace(self.terms["start"], self.terms["end"], self.data.shape[1]//5))
exp_diffs = np.exp(np.linspace(self.terms["start"], self.terms["end"], self.data.shape[1]//5)) /10
# Compute testing data
# Find the dimension to watch for
......
......@@ -143,6 +143,13 @@ if __name__ == "__main__":
df_train_s.to_csv(f"{OUTPUT_DIR}/{config_name}/dataset_{idx}_train.csv", index_label="Timestamp")
pd.DataFrame(data=lab).to_csv(f"{OUTPUT_DIR}/{config_name}/dataset_{idx}_labels.csv", index=False)
with open(f"{OUTPUT_DIR}/{config_name}/subsystems.txt", "w", encoding="utf-8") as f:
clusters = np.zeros(dataset.shape[0])
for idx, cluster in enumerate(subsystems):
for member in cluster:
clusters[member] = idx
f.write(json.dumps(clusters.tolist()))
# =================================================================================================================
# Split data
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment