Add lstm_ad

3e7da284 · Pierre LOTTE · c218c0e6 · 3e7da284 · 3e7da284 · 3e7da284
Commit 3e7da284 authored 10 months ago by Pierre LOTTE
--- a/algorithms/lstm_ad/Dockerfile
+++ b/algorithms/lstm_ad/Dockerfile
+FROM ghcr.io/timeeval/python3-torch:0.3.0
+
+LABEL maintainer="phillip.wenig@hpi.de"
+LABEL org.opencontainers.image.licenses=MIT
+
+ENV ALGORITHM_MAIN="/app/algorithm.py"
+
+# install algorithm dependencies
+COPY requirements.txt /app/
+RUN pip install -r /app/requirements.txt
+
+COPY lstm_ad /app/lstm_ad
+COPY manifest.json /app/
+COPY algorithm.py /app/
--- a/algorithms/lstm_ad/LICENSE
+++ b/algorithms/lstm_ad/LICENSE
+MIT License
+
+Copyright (c) 2020-2022 Phillip Wenig and Sebastian Schmidl
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/algorithms/lstm_ad/README.md
+++ b/algorithms/lstm_ad/README.md
+# LSTM-AD
+
+|||
+| :--- | :--- |
+| Citekey | MalhotraEtAl2015Long |
+| Source code | `own` |
+| Learning type | semi-supervised |
+| Input dimensionality | multivariate |
+|||
+
+## Dependencies
+
+- python 3
+- pytorch
+
+## Notes
+
+LSTM-AD outputs anomaly scores for windows.
+The results require post-processing.
+The scores for each point can be assigned by aggregating the anomaly scores for each window the point is included in.
+
+You can use the following code snippet for the post-processing step in TimeEval (default parameters directly filled in from the source code):
+
+<!--BEGIN:timeeval-post-->
+```python
+from timeeval.utils.window import ReverseWindowing
+# post-processing for LSTM-AD
+def post_lstm_ad(scores: np.ndarray, args: dict) -> np.ndarray:
+    window_size = args.get("hyper_params", {}).get("window_size", 30)
+    prediction_window_size = args.get("hyper_params", {}).get("prediction_window_size", 1)
+    return ReverseWindowing(window_size=window_size + prediction_window_size).fit_transform(scores)
+```
+<!--END:timeeval-post-->
--- a/algorithms/lstm_ad/algorithm.py
+++ b/algorithms/lstm_ad/algorithm.py
+import argparse
+from dataclasses import dataclass, asdict, field
+import json
+import numpy as np
+import pandas as pd
+import sys
+from typing import List
+
+from lstm_ad.model import LSTMAD
+
+
+@dataclass
+class CustomParameters:
+    lstm_layers: int = 2
+    split: float = 0.9
+    window_size: int = 30
+    prediction_window_size: int = 1
+    output_dims: List[int] = field(default_factory=lambda: [])
+    batch_size: int = 32
+    validation_batch_size: int = 128
+    test_batch_size: int = 128
+    epochs: int = 50  # bigger for smaller datasets, smaller for bigger datasets
+    early_stopping_delta: float = 0.05
+    early_stopping_patience: int = 10
+    optimizer: str = "adam"  # not exposed, always use Adam!
+    learning_rate: float = 1e-3
+    random_state: int = 42
+
+
+class AlgorithmArgs(argparse.Namespace):
+    @property
+    def ts(self) -> np.ndarray:
+        return self.df.iloc[:, 1:-1].values
+
+    @property
+    def df(self) -> pd.DataFrame:
+        return pd.read_csv(self.dataInput)
+
+    @staticmethod
+    def from_sys_args() -> 'AlgorithmArgs':
+        args: dict = json.loads(sys.argv[1])
+        custom_parameter_keys = dir(CustomParameters())
+        filtered_parameters = dict(
+            filter(lambda x: x[0] in custom_parameter_keys, args.get("customParameters", {}).items()))
+        args["customParameters"] = CustomParameters(**filtered_parameters)
+        return AlgorithmArgs(**args)
+
+
+def train(args: AlgorithmArgs):
+    data = args.ts
+    model = LSTMAD(input_size=data.shape[1], **asdict(args.customParameters))
+    model.fit(data, args.modelOutput)
+    model.save(args.modelOutput)
+
+
+def execute(args: AlgorithmArgs):
+    data = args.ts
+    model = LSTMAD.load(args.modelInput, input_size=data.shape[1], **asdict(args.customParameters))
+    anomaly_scores = model.anomaly_detection(data)
+    anomaly_scores.tofile(args.dataOutput, sep="\n")
+
+
+def set_random_state(config: AlgorithmArgs) -> None:
+    seed = config.customParameters.random_state
+    import random, torch
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+
+
+if __name__ == "__main__":
+    args = AlgorithmArgs.from_sys_args()
+    set_random_state(args)
+
+    if args.executionType == "train":
+        train(args)
+    elif args.executionType == "execute":
+        execute(args)
+    else:
+        raise ValueError(f"No executionType '{args.executionType}' available! Choose either 'train' or 'execute'.")
--- a/algorithms/lstm_ad/manifest.json
+++ b/algorithms/lstm_ad/manifest.json
+{
+  "title": "LSTM-AD",
+  "description": "Implementation of https://www.elen.ucl.ac.be/Proceedings/esann/esannpdf/es2015-56.pdf",
+  "inputDimensionality": "multivariate",
+  "version": "0.3.0",
+  "authors": "Pankaj Malhotra, Lovekesh Vig, Gautam Shroff, Puneet Agarwal",
+  "language": "Python",
+  "type": "Detector",
+  "mainFile": "algorithm.py",
+  "learningType": "semi-supervised",
+  "trainingStep": {
+    "parameters": [
+      {
+        "name": "lstm_layers",
+        "type": "int",
+        "defaultValue": 2,
+        "optional": "true",
+        "description": "Number of stacked LSTM layers"
+      },
+      {
+        "name": "split",
+        "type": "float",
+        "defaultValue": 0.9,
+        "optional": "true",
+        "description": "Train-validation split for early stopping"
+      },
+      {
+        "name": "window_size",
+        "type": "int",
+        "defaultValue": 30,
+        "optional": "true",
+        "description": ""
+      },
+      {
+        "name": "prediction_window_size",
+        "type": "int",
+        "defaultValue": 1,
+        "optional": "true",
+        "description": "Number of points predicted"
+      },
+      {
+        "name": "batch_size",
+        "type": "int",
+        "defaultValue": 32,
+        "optional": "true",
+        "description": "Number of instances trained at the same time"
+      },
+      {
+        "name": "validation_batch_size",
+        "type": "int",
+        "defaultValue": 128,
+        "optional": "true",
+        "description": "Number of instances used for validation at the same time"
+      },
+      {
+        "name": "epochs",
+        "type": "int",
+        "defaultValue": 50,
+        "optional": "true",
+        "description": "Number of training iterations over entire dataset"
+      },
+      {
+        "name": "early_stopping_delta",
+        "type": "float",
+        "defaultValue": 0.05,
+        "optional": "true",
+        "description": "If 1 - (loss / last_loss) is less than `delta` for `patience` epochs, stop"
+      },
+      {
+        "name": "early_stopping_patience",
+        "type": "int",
+        "defaultValue": 10,
+        "optional": "true",
+        "description": "If 1 - (loss / last_loss) is less than `delta` for `patience` epochs, stop"
+      },
+      {
+        "name": "learning_rate",
+        "type": "float",
+        "defaultValue": 0.001,
+        "optional": "true",
+        "description": "Learning rate for Adam optimizer"
+      },
+      {
+        "name": "random_state",
+        "type": "int",
+        "defaultValue": 42,
+        "optional": "true",
+        "description": "Seed for the random number generator"
+      }
+    ],
+    "modelInput": "none"
+  },
+  "executionStep": {
+    "parameters": [
+      {
+        "name": "lstm_layers",
+        "type": "int",
+        "defaultValue": 2,
+        "optional": "true",
+        "description": "Number of stacked LSTM layers"
+      },
+      {
+        "name": "window_size",
+        "type": "int",
+        "defaultValue": 30,
+        "optional": "true",
+        "description": "Size of the sliding windows"
+      },
+      {
+        "name": "prediction_window_size",
+        "type": "int",
+        "defaultValue": 1,
+        "optional": "true",
+        "description": "Number of points predicted"
+      },
+      {
+        "name": "test_batch_size",
+        "type": "int",
+        "defaultValue": 128,
+        "optional": "true",
+        "description": "Number of instances used for testing at the same time"
+      },
+      {
+        "name": "random_state",
+        "type": "int",
+        "defaultValue": 42,
+        "optional": "true",
+        "description": "Seed for the random number generator"
+      }
+    ],
+    "modelInput": "required"
+  }
+}
\ No newline at end of file
--- a/algorithms/lstm_ad/requirements.txt
+++ b/algorithms/lstm_ad/requirements.txt
+numpy>=1.19.5
+pandas>=1.2.1
+torch==1.7.1