Back to Models
model_pytorch
v1.0.0
PyTorchPyTorch Autoencoder for reconstruction-error-based anomaly detection. Uses an encoder-decoder architecture with ReLU activations and MSE loss, trained with Adam optimizer.
$ openuba install model_pytorch OpenUBA
pytorch
License: Apache-2.0
autoencoderdeep-learningreconstruction-errorpytorchneural-network
Parameters
| Name | Type | Default | Description |
|---|---|---|---|
| learning_rate | float | 0.001 | Learning rate |
| epochs | integer | 10 | Number of training epochs |
model.yaml
1name: model_pytorch
2version: 1.0.0
3runtime: pytorch
4description: PyTorch Autoencoder
5parameters:
6 learning_rate:
7 type: float
8 default: 0.001
9 description: Learning rate
10 epochs:
11 type: integer
12 default: 10
13 description: Number of training epochs
14MODEL.py
1
2import pandas as pd
3import numpy as np
4import torch
5import torch.nn as nn
6import torch.optim as optim
7from typing import Dict, Any
8
9class Autoencoder(nn.Module):
10 def __init__(self, input_dim):
11 super(Autoencoder, self).__init__()
12 self.encoder = nn.Sequential(
13 nn.Linear(input_dim, 16),
14 nn.ReLU(),
15 nn.Linear(16, 8),
16 nn.ReLU()
17 )
18 self.decoder = nn.Sequential(
19 nn.Linear(8, 16),
20 nn.ReLU(),
21 nn.Linear(16, input_dim)
22 )
23
24 def forward(self, x):
25 encoded = self.encoder(x)
26 decoded = self.decoder(encoded)
27 return decoded
28
29class Model:
30 def __init__(self):
31 self.model = None
32 self.input_dim = 10 # default fallback
33
34 def train(self, ctx) -> Dict[str, Any]:
35 """
36 Train the PyTorch Autoencoder
37 """
38 ctx.logger.info("Starting PyTorch Autoencoder training...")
39
40 # Data Prep
41 if ctx.df is None or ctx.df.empty:
42 ctx.logger.warning("No data, generating dummy")
43 X = np.random.randn(100, 10).astype(np.float32)
44 else:
45 X = ctx.df.select_dtypes(include=[np.number]).values.astype(np.float32)
46
47 self.input_dim = X.shape[1]
48 self.model = Autoencoder(self.input_dim)
49
50 criterion = nn.MSELoss()
51 optimizer = optim.Adam(self.model.parameters(), lr=0.01)
52
53 # Training Loop
54 epochs = 50
55 dataset = torch.tensor(X)
56 self.model.train()
57
58 loss_val = 0.0
59 for epoch in range(epochs):
60 optimizer.zero_grad()
61 outputs = self.model(dataset)
62 loss = criterion(outputs, dataset)
63 loss.backward()
64 optimizer.step()
65 loss_val = loss.item()
66
67 ctx.logger.info(f"Training completed. Final Loss: {loss_val}")
68
69 # Save state (in memory for this instance, usually would save to disk)
70 # torch.save(self.model.state_dict(), "model.pth")
71
72 return {
73 "status": "success",
74 "model_type": "PyTorch Autoencoder",
75 "final_loss": float(loss_val),
76 "input_dim": self.input_dim
77 }
78
79 def infer(self, ctx) -> pd.DataFrame:
80 """
81 Inference: Compute reconstruction error as anomaly score
82 """
83 ctx.logger.info("Starting PyTorch inference...")
84
85 if ctx.df is None or ctx.df.empty:
86 X = np.random.randn(20, self.input_dim).astype(np.float32)
87 ids = [f"user_{i}" for i in range(20)]
88 else:
89 X = ctx.df.select_dtypes(include=[np.number]).values.astype(np.float32)
90 # Handle dimension mismatch if infer data differs from train default
91 if X.shape[1] != self.input_dim:
92 # Resize or pad for demo
93 ctx.logger.warning(f"Dim mismatch: expected {self.input_dim}, got {X.shape[1]}. Truncating/Padding.")
94 if X.shape[1] > self.input_dim:
95 X = X[:, :self.input_dim]
96 else:
97 padding = np.zeros((X.shape[0], self.input_dim - X.shape[1]), dtype=np.float32)
98 X = np.hstack((X, padding))
99
100 if "entity_id" in ctx.df.columns:
101 ids = ctx.df["entity_id"].values
102 else:
103 ids = [f"entity_{i}" for i in range(len(X))]
104
105 # Instantiate if not trained
106 if self.model is None:
107 self.model = Autoencoder(self.input_dim)
108 self.model.eval() # Using random weights effectively
109 else:
110 self.model.eval()
111
112 with torch.no_grad():
113 inputs = torch.tensor(X)
114 outputs = self.model(inputs)
115 mse = torch.mean((inputs - outputs) ** 2, dim=1).numpy()
116
117 results = []
118 for i, score in enumerate(mse):
119 # Higher reconstruction error = higher anomaly risk
120 # Normalize reasonably for demo 0.0 - 2.0 -> 0 - 100
121 risk = min(100.0, float(score) * 50)
122
123 results.append({
124 "entity_id": str(ids[i]),
125 "risk_score": float(risk),
126 "anomaly_type": "reconstruction_error" if risk > 50 else "normal",
127 "details": {"mse": float(score)}
128 })
129
130 return pd.DataFrame(results)
131
132 def execute(self, data=None):
133 # shim for v1
134 class MockCtx:
135 def __init__(self, d): self.df = d if d else pd.DataFrame(); self.logger = type('obj', (object,), {'info': print, 'warning': print})
136 return self.infer(MockCtx(pd.DataFrame(data) if data else None)).to_dict('records')
137