Skip to content
Snippets Groups Projects
Commit 3e7da284 authored by Pierre LOTTE's avatar Pierre LOTTE
Browse files

Add lstm_ad

parent c218c0e6
No related branches found
No related tags found
No related merge requests found
FROM ghcr.io/timeeval/python3-torch:0.3.0
LABEL maintainer="phillip.wenig@hpi.de"
LABEL org.opencontainers.image.licenses=MIT
ENV ALGORITHM_MAIN="/app/algorithm.py"
# install algorithm dependencies
COPY requirements.txt /app/
RUN pip install -r /app/requirements.txt
COPY lstm_ad /app/lstm_ad
COPY manifest.json /app/
COPY algorithm.py /app/
MIT License
Copyright (c) 2020-2022 Phillip Wenig and Sebastian Schmidl
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
# LSTM-AD
|||
| :--- | :--- |
| Citekey | MalhotraEtAl2015Long |
| Source code | `own` |
| Learning type | semi-supervised |
| Input dimensionality | multivariate |
|||
## Dependencies
- python 3
- pytorch
## Notes
LSTM-AD outputs anomaly scores for windows.
The results require post-processing.
The scores for each point can be assigned by aggregating the anomaly scores for each window the point is included in.
You can use the following code snippet for the post-processing step in TimeEval (default parameters directly filled in from the source code):
<!--BEGIN:timeeval-post-->
```python
from timeeval.utils.window import ReverseWindowing
# post-processing for LSTM-AD
def post_lstm_ad(scores: np.ndarray, args: dict) -> np.ndarray:
window_size = args.get("hyper_params", {}).get("window_size", 30)
prediction_window_size = args.get("hyper_params", {}).get("prediction_window_size", 1)
return ReverseWindowing(window_size=window_size + prediction_window_size).fit_transform(scores)
```
<!--END:timeeval-post-->
import argparse
from dataclasses import dataclass, asdict, field
import json
import numpy as np
import pandas as pd
import sys
from typing import List
from lstm_ad.model import LSTMAD
@dataclass
class CustomParameters:
lstm_layers: int = 2
split: float = 0.9
window_size: int = 30
prediction_window_size: int = 1
output_dims: List[int] = field(default_factory=lambda: [])
batch_size: int = 32
validation_batch_size: int = 128
test_batch_size: int = 128
epochs: int = 50 # bigger for smaller datasets, smaller for bigger datasets
early_stopping_delta: float = 0.05
early_stopping_patience: int = 10
optimizer: str = "adam" # not exposed, always use Adam!
learning_rate: float = 1e-3
random_state: int = 42
class AlgorithmArgs(argparse.Namespace):
@property
def ts(self) -> np.ndarray:
return self.df.iloc[:, 1:-1].values
@property
def df(self) -> pd.DataFrame:
return pd.read_csv(self.dataInput)
@staticmethod
def from_sys_args() -> 'AlgorithmArgs':
args: dict = json.loads(sys.argv[1])
custom_parameter_keys = dir(CustomParameters())
filtered_parameters = dict(
filter(lambda x: x[0] in custom_parameter_keys, args.get("customParameters", {}).items()))
args["customParameters"] = CustomParameters(**filtered_parameters)
return AlgorithmArgs(**args)
def train(args: AlgorithmArgs):
data = args.ts
model = LSTMAD(input_size=data.shape[1], **asdict(args.customParameters))
model.fit(data, args.modelOutput)
model.save(args.modelOutput)
def execute(args: AlgorithmArgs):
data = args.ts
model = LSTMAD.load(args.modelInput, input_size=data.shape[1], **asdict(args.customParameters))
anomaly_scores = model.anomaly_detection(data)
anomaly_scores.tofile(args.dataOutput, sep="\n")
def set_random_state(config: AlgorithmArgs) -> None:
seed = config.customParameters.random_state
import random, torch
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if __name__ == "__main__":
args = AlgorithmArgs.from_sys_args()
set_random_state(args)
if args.executionType == "train":
train(args)
elif args.executionType == "execute":
execute(args)
else:
raise ValueError(f"No executionType '{args.executionType}' available! Choose either 'train' or 'execute'.")
{
"title": "LSTM-AD",
"description": "Implementation of https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf",
"inputDimensionality": "multivariate",
"version": "0.3.0",
"authors": "Pankaj Malhotra, Lovekesh Vig, Gautam Shroff, Puneet Agarwal",
"language": "Python",
"type": "Detector",
"mainFile": "algorithm.py",
"learningType": "semi-supervised",
"trainingStep": {
"parameters": [
{
"name": "lstm_layers",
"type": "int",
"defaultValue": 2,
"optional": "true",
"description": "Number of stacked LSTM layers"
},
{
"name": "split",
"type": "float",
"defaultValue": 0.9,
"optional": "true",
"description": "Train-validation split for early stopping"
},
{
"name": "window_size",
"type": "int",
"defaultValue": 30,
"optional": "true",
"description": ""
},
{
"name": "prediction_window_size",
"type": "int",
"defaultValue": 1,
"optional": "true",
"description": "Number of points predicted"
},
{
"name": "batch_size",
"type": "int",
"defaultValue": 32,
"optional": "true",
"description": "Number of instances trained at the same time"
},
{
"name": "validation_batch_size",
"type": "int",
"defaultValue": 128,
"optional": "true",
"description": "Number of instances used for validation at the same time"
},
{
"name": "epochs",
"type": "int",
"defaultValue": 50,
"optional": "true",
"description": "Number of training iterations over entire dataset"
},
{
"name": "early_stopping_delta",
"type": "float",
"defaultValue": 0.05,
"optional": "true",
"description": "If 1 - (loss / last_loss) is less than `delta` for `patience` epochs, stop"
},
{
"name": "early_stopping_patience",
"type": "int",
"defaultValue": 10,
"optional": "true",
"description": "If 1 - (loss / last_loss) is less than `delta` for `patience` epochs, stop"
},
{
"name": "learning_rate",
"type": "float",
"defaultValue": 0.001,
"optional": "true",
"description": "Learning rate for Adam optimizer"
},
{
"name": "random_state",
"type": "int",
"defaultValue": 42,
"optional": "true",
"description": "Seed for the random number generator"
}
],
"modelInput": "none"
},
"executionStep": {
"parameters": [
{
"name": "lstm_layers",
"type": "int",
"defaultValue": 2,
"optional": "true",
"description": "Number of stacked LSTM layers"
},
{
"name": "window_size",
"type": "int",
"defaultValue": 30,
"optional": "true",
"description": "Size of the sliding windows"
},
{
"name": "prediction_window_size",
"type": "int",
"defaultValue": 1,
"optional": "true",
"description": "Number of points predicted"
},
{
"name": "test_batch_size",
"type": "int",
"defaultValue": 128,
"optional": "true",
"description": "Number of instances used for testing at the same time"
},
{
"name": "random_state",
"type": "int",
"defaultValue": 42,
"optional": "true",
"description": "Seed for the random number generator"
}
],
"modelInput": "required"
}
}
\ No newline at end of file
numpy>=1.19.5
pandas>=1.2.1
torch==1.7.1
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment