Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
PARADISE
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
SIG
Theses
Pierre Lotte
PARADISE
Commits
a806e498
Commit
a806e498
authored
8 months ago
by
Pierre LOTTE
Browse files
Options
Downloads
Patches
Plain Diff
Add F1 measure with threshold selection from ROC curve
parent
e656158b
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
results/base.py
+1
-1
1 addition, 1 deletion
results/base.py
results/roc.py
+65
-62
65 additions, 62 deletions
results/roc.py
with
66 additions
and
63 deletions
results/base.py
+
1
−
1
View file @
a806e498
...
...
@@ -11,7 +11,7 @@ class BaseResults():
self
.
path
=
path
self
.
algos
=
algos
self
.
configs
=
config_names
self
.
result
=
{}
self
.
result
=
{
"
roc
"
:
{},
"
f1
"
:
{}
}
def
compute
(
self
,
auto_split
=
False
):
"""
...
...
This diff is collapsed.
Click to expand it.
results/roc.py
+
65
−
62
View file @
a806e498
...
...
@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
import
numpy
as
np
import
pandas
as
pd
from
sklearn.metrics
import
roc_auc_score
,
roc_curve
from
sklearn.metrics
import
roc_auc_score
,
roc_curve
,
f1_score
from
sklearn.preprocessing
import
Normalizer
from
.base
import
BaseResults
...
...
@@ -24,7 +24,6 @@ class ROCResults(BaseResults):
"""
return
subprocess
.
check_output
(
cmd
,
shell
=
True
).
decode
(
"
utf-8
"
).
split
()
norm
=
Normalizer
()
for
config
in
self
.
configs
:
config_name
=
config
.
split
(
"
/
"
)[
-
1
][:
-
5
]
if
"
.
"
in
config
else
config
.
split
(
"
/
"
)[
-
1
]
...
...
@@ -32,79 +31,32 @@ class ROCResults(BaseResults):
# Compute the score for the full_dataset
for
algo
in
self
.
algos
:
y_pred
=
np
.
loadtxt
(
f
"
{
self
.
path
}
/
{
config_name
}
/results_
{
algo
}
/anomaly_scores_dataset_
{
algo
}
.ts
"
)
y_pred
=
np
.
nan_to_num
(
y_pred
)
y_pred
_path
=
f
"
{
self
.
path
}
/
{
config_name
}
/results_
{
algo
}
/anomaly_scores_dataset_
{
algo
}
.ts
"
roc
,
f1
=
self
.
__compute_score
(
labels
,
y_pred
_path
)
if
len
(
y_pred
)
!=
len
(
labels
):
y_pred
=
self
.
__vote_for_score
(
y_pred
,
len
(
labels
))
score
=
roc_auc_score
(
labels
,
y_pred
)
fpr
,
tpr
,
_
=
roc_curve
(
labels
,
y_pred
)
if
algo
in
self
.
result
:
self
.
result
[
algo
][
config_name
]
=
{
"
classic
"
:
round
(
score
,
4
)}
if
algo
in
self
.
result
[
"
roc
"
]:
self
.
result
[
"
roc
"
][
algo
][
config_name
]
=
{
"
classic
"
:
round
(
roc
,
4
)}
self
.
result
[
"
f1
"
][
algo
][
config_name
]
=
{
"
classic
"
:
round
(
f1
,
4
)}
else
:
self
.
result
[
algo
]
=
{
config_name
:
{
"
classic
"
:
round
(
score
,
4
)}}
plt
.
rcParams
[
"
figure.figsize
"
]
=
(
10
,
10
)
plt
.
plot
(
fpr
,
tpr
)
plt
.
plot
(
np
.
linspace
(
0
,
1
,
10
),
np
.
linspace
(
0
,
1
,
10
),
linestyle
=
"
--
"
,
label
=
"
ROC=0.5
"
)
plt
.
xlabel
(
"
False Positive Rate
"
)
plt
.
ylabel
(
"
True Positive Rate
"
)
plt
.
legend
()
plt
.
savefig
(
f
"
{
self
.
path
}
/
{
config_name
}
/roc_auc_
{
algo
}
.png
"
)
plt
.
clf
()
plt
.
rcParams
[
"
figure.figsize
"
]
=
(
20
,
10
)
self
.
result
[
"
roc
"
][
algo
]
=
{
config_name
:
{
"
classic
"
:
round
(
roc
,
4
)}}
self
.
result
[
"
f1
"
][
algo
]
=
{
config_name
:
{
"
classic
"
:
round
(
f1
,
4
)}}
# Compute results for automatically splitted dataset
if
auto_split
:
files
=
__exec
(
f
"
find -L
{
self
.
path
}
/
{
config_name
}
/results_
{
algo
}
-regex
'
^.*dataset[_0-9]+_auto_split_
{
algo
}
.ts
'"
)
result
=
np
.
zeros
(
len
(
labels
))
for
file
in
files
:
y_pred
=
np
.
loadtxt
(
file
)
y_pred
=
np
.
nan_to_num
(
y_pred
)
if
len
(
y_pred
)
!=
len
(
labels
):
y_pred
=
self
.
__vote_for_score
(
y_pred
,
len
(
labels
))
y_pred
=
norm
.
fit_transform
(
y_pred
.
reshape
(
1
,
-
1
)).
reshape
(
-
1
)
result
=
np
.
maximum
(
result
,
y_pred
)
score
=
roc_auc_score
(
labels
,
result
)
self
.
result
[
algo
][
f
"
{
config_name
}
"
][
"
auto_split
"
]
=
round
(
score
,
4
)
roc
,
f1
=
self
.
__compute_score
(
labels
,
files
,
local
=
True
)
self
.
result
[
"
roc
"
][
algo
][
f
"
{
config_name
}
"
][
"
auto_split
"
]
=
round
(
roc
,
4
)
self
.
result
[
"
f1
"
][
algo
][
f
"
{
config_name
}
"
][
"
auto_split
"
]
=
round
(
f1
,
4
)
# Compute results for splitted dataset
files
=
__exec
(
f
"
find -L
{
self
.
path
}
/
{
config_name
}
/results_
{
algo
}
-regex
'
^.*dataset[_0-9]+_
{
algo
}
.ts
'"
)
result
=
np
.
zeros
(
len
(
labels
))
for
file
in
files
:
y_pred
=
np
.
loadtxt
(
file
)
y_pred
=
np
.
nan_to_num
(
y_pred
)
if
len
(
y_pred
!=
len
(
labels
)):
y_pred
=
self
.
__vote_for_score
(
y_pred
,
len
(
labels
))
y_pred
=
norm
.
fit_transform
(
y_pred
.
reshape
(
1
,
-
1
)).
reshape
(
-
1
)
result
=
np
.
maximum
(
result
,
y_pred
)
score
=
roc_auc_score
(
labels
,
result
)
self
.
result
[
algo
][
f
"
{
config_name
}
"
][
"
split
"
]
=
round
(
score
,
4
)
roc
,
f1
=
self
.
__compute_score
(
labels
,
files
,
local
=
True
)
self
.
result
[
"
roc
"
][
algo
][
f
"
{
config_name
}
"
][
"
split
"
]
=
round
(
roc
,
4
)
self
.
result
[
"
f1
"
][
algo
][
f
"
{
config_name
}
"
][
"
split
"
]
=
round
(
f1
,
4
)
print
(
json
.
dumps
(
self
.
result
))
# for algo, algo_res in self.result.items():
# l_classic = []
# l_auto_split = []
# l_split = []
# for details in algo_res.values():
# l_classic.append(details["classic"])
# l_auto_split.append(details["auto_split"])
# l_split.append(details["split"])
# print(f"{algo}:")
# print(f"\tClassic: {np.mean(l_classic)} ({np.std(l_classic)})")
# print(f"\tAuto split: {np.mean(l_auto_split)} ({np.std(l_auto_split)})")
# print(f"\tSplit: {np.mean(l_split)} ({np.std(l_split)})")
def
__vote_for_score
(
self
,
scores
,
length
):
"""
Compute the score for each point of the dataset instead of a per window basis.
...
...
@@ -120,3 +72,54 @@ class ROCResults(BaseResults):
results
[
idx
]
=
np
.
mean
(
scores
[
start
:
end
])
return
results
def
__compute_score
(
self
,
labels
,
y_pred_path
,
local
=
False
):
"""
This function computes the roc and F1 score of the given predictions
"""
result
=
np
.
zeros
(
len
(
labels
))
# If local is set to true, it means that we have a list of path for local scores.
# We must first retrieve all the scores and aggregate them.
if
local
:
norm
=
Normalizer
()
if
len
(
y_pred_path
)
==
0
:
return
0
,
0
for
path
in
y_pred_path
:
y_pred
=
np
.
loadtxt
(
path
)
y_pred
=
np
.
nan_to_num
(
y_pred
)
if
len
(
y_pred
!=
len
(
labels
)):
y_pred
=
self
.
__vote_for_score
(
y_pred
,
len
(
labels
))
y_pred
=
norm
.
fit_transform
(
y_pred
.
reshape
(
1
,
-
1
)).
reshape
(
-
1
)
result
=
np
.
maximum
(
result
,
y_pred
)
# Otherwise, we simply have one score file, we must read it and compute the score
# for each instant.
else
:
y_pred
=
np
.
loadtxt
(
y_pred_path
)
result
=
np
.
nan_to_num
(
y_pred
)
if
len
(
y_pred
)
!=
len
(
labels
):
result
=
self
.
__vote_for_score
(
y_pred
,
len
(
labels
))
# Once the correct anomaly scores have been computed, we can compute the metrics
roc
=
roc_auc_score
(
labels
,
result
)
fpr
,
tpr
,
thresh
=
roc_curve
(
labels
,
result
)
closest_dist
=
np
.
inf
closest_thresh
=
np
.
inf
best_couple
=
(
0
,
0
)
for
f
,
t
,
th
in
zip
(
fpr
,
tpr
,
thresh
):
dist
=
np
.
sqrt
((
f
-
0
)
**
2
+
(
t
-
1
)
**
2
)
if
dist
<
closest_dist
:
closest_dist
=
dist
closest_thresh
=
th
best_couple
=
(
f
,
t
)
binary_labels
=
(
result
>
closest_thresh
).
astype
(
int
)
f1
=
f1_score
(
labels
,
binary_labels
)
return
roc
,
f1
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment