diff --git a/subject1-4/.keep b/subject1-4/.keep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/subject1-4/AdaDiff/__pycache__/denoising_diffusion_pytorch_1d.cpython-38.pyc b/subject1-4/AdaDiff/__pycache__/denoising_diffusion_pytorch_1d.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd767cc1f05e0a812322e7c3b3ff2dfcdc91cccb Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/denoising_diffusion_pytorch_1d.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/__pycache__/diffusion_module2.cpython-38.pyc b/subject1-4/AdaDiff/__pycache__/diffusion_module2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f1062ad31b5404d9a9a1c3911e5d1b2988129e27 Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/diffusion_module2.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/__pycache__/models2.cpython-38.pyc b/subject1-4/AdaDiff/__pycache__/models2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b258adaeb142817db9045816fba314a6537b315c Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/models2.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-311.pyc b/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..989f09cd914b8bd7802aa50332df0a18aea74d48 Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-311.pyc differ diff --git a/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-38.pyc b/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a5fd163f8f052e9584ed87e1857a78a32e654070 Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/train_diffusion_val.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/__pycache__/unet2.cpython-38.pyc b/subject1-4/AdaDiff/__pycache__/unet2.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a96c76683a12e959d5fd14e41cd2d3fe5f74710 Binary files /dev/null and b/subject1-4/AdaDiff/__pycache__/unet2.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/client.py b/subject1-4/AdaDiff/client.py new file mode 100644 index 0000000000000000000000000000000000000000..f1412b9377d0335b7f49145310f226307ceb2409 --- /dev/null +++ b/subject1-4/AdaDiff/client.py @@ -0,0 +1,203 @@ +import socket +import threading +import time +from queue import Queue +import train_diffusion_val +import torch +from torch.utils.data import Dataset, DataLoader, TensorDataset +import torch.nn as nn +import numpy as np +import pickle +import random +class EdgeNode: + def __init__(self, server_address, data_set, model): + # self.node_id = node_id + # self.data_buffer = [] + # self.data_stream = [] + self.ratio = 0.6 + self.model = model + self.server_address = server_address + self.data_set = data_set + self.data_index = 0 + self.lock = threading.Lock() + self.is_running = True # 用于控制数据流读取的循环 + self.results = [] + self.time = 0 + + def read_data_stream(self): + while self.is_running: + time.sleep(2) + new_data_point = self.generate_data_point() + # self.data_stream.append(new_data_point) + + with self.lock: + print(f"Processing data: ") + # 创建新线程处理数据 + self.process_data(new_data_point) + self.data_index+=1 + # if len(self.data_buffer) == 3: + # print(f"Node {self.node_id} - Processing data: {self.data_buffer}") + # # 创建新线程处理数据 + # processing_thread = threading.Thread(target=self.process_data, args=(self.data_buffer,)) + # processing_thread.start() + + # # 移动窗口,准备接收下一个数据点 + # # self.data_buffer = self.data_buffer[1:] + + def generate_data_point(self): + # 按顺序从数据集中获取数据点 + if(self.data_index == np.shape(self.data_set)[0] - 1): + self.is_running = False + # data_point = self.data_set[self.data_index] + return self.data_set[self.data_index].unsqueeze(0) + + def process_data(self, data_fragment): + # import time + startTime = time.time() + noised_series = self.noising(data_fragment) + + if(random.randint(1,100) < 10): + print("网络故障,本地计算") + self.results.append(noised_series) + endTime = time.time() + print("耗时", endTime - startTime) + self.time += endTime - startTime + return + + print(f"将加噪后的时序传输给服务器:") + # 异常检测到,将数据片段、异常数据点的ID和边缘节点ID上报给中心服务器 + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as client_socket: + client_socket.connect(self.server_address) + + send_dict = {"ratio":self.ratio,"data":noised_series} + + serialized_data = pickle.dumps(send_dict) + chunk_size = 1024 + + client_socket.sendall(str(len(serialized_data)).encode()) + point = client_socket.recv(1024) + print("收到信号", point.decode()) + print(len(serialized_data)) + for i in range(0, len(serialized_data), chunk_size): + chunk = serialized_data[i:i+chunk_size] + client_socket.sendall(chunk) + # client_socket.sendall(data_to_send) + + print("等待接受服务端数据:") + length = int(client_socket.recv(1024).decode()) + + print("收到长度",length) + client_socket.sendall('go'.encode()) + serialized_data = b'' + while True: + chunk = client_socket.recv(1024) # 接收数据块(这里假设每次接收1KB) + + serialized_data += chunk + if len(serialized_data) == length: # 如果接收到的数据为空,表示传输完毕 + break + # print(len(serialized_data)) + deserialized_data = pickle.loads(serialized_data) + # print(deserialized_data) + print(len(serialized_data)) + self.results.append(deserialized_data) + endTime = time.time() + print("耗时", endTime - startTime) + self.time += endTime - startTime + + def noising(self, data): + return self.model(data, 0, int(self.model.denoise_steps*self.ratio))[1].transpose(2,1) + + +if __name__ == "__main__": + server_address = ('localhost', 8892) # 中心服务器地址和端口 + + training_mode = "diffusion" + lr = 1e-3 + window_size = 128 + p1 = 1 + p2 = 1 + dataset_name = "point_global" + batch_size = 32 + noise_steps = 100 + denoise_steps = 50 + diff_lambda = 0.1 + part = None + device = "cuda" + + experiment = f'diffv4_{dataset_name}_{noise_steps}-{denoise_steps}_{diff_lambda}_1e-3_{batch_size}_{window_size}' + + train_loader, test_loader, validation_loader, labels, validation_labels = train_diffusion_val.load_dataset(dataset_name, part) + + model, diffusion_training_net, diffusion_prediction_net, optimizer, scheduler = \ + train_diffusion_val.load_model(training_mode ,lr, window_size, p1, p2, labels.shape[1], batch_size, noise_steps, denoise_steps) + model, diffusion_training_net = train_diffusion_val.load_from_checkpoint(training_mode, experiment, model, diffusion_training_net) + diffusion_training_net = diffusion_training_net.to(device) + diffusion_prediction_net = diffusion_prediction_net.to(device) + + diffusion_prediction_net.load_state_dict(diffusion_training_net.state_dict()) + diffusion_prediction_net.eval() + diffusion_training_net.eval() + + trainD, testD, validationD = next(iter(train_loader)), next(iter(test_loader)), next(iter(validation_loader)) + testD = train_diffusion_val.convert_to_windows(testD, window_size) + print(np.shape(testD)) + data_x = torch.tensor(testD, dtype=torch.float32) + data_x = data_x.to(device) + # dataset = TensorDataset(data_x, data_x) + # dataloader = DataLoader(dataset, batch_size = batch_size) + + # STime = time.time() + # l1s = [] + # feats=labels.shape[1] + # for window, _ in dataloader: + # window = window.to(device) + # _, x_recon = diffusion_prediction_net(window,0,45) + # _, x_recon = diffusion_prediction_net(window,45,50) + # x_recon = x_recon.transpose(2,1) + # l = nn.MSELoss(reduction = 'none') + # loss = l(x_recon, window) + # l1s.append(loss) + # ETime = time.time() + # loss0 = torch.cat(l1s).detach().cpu().numpy() + # loss0 = loss0.reshape(-1,feats) + + # lossFinal = np.mean(np.array(loss0), axis=1) + # labelsFinal = (np.sum(labels, axis=1) >= 1) + 0 + # validation_thresh = 0 + # result, fprs, tprs = train_diffusion_val.evaluate(lossFinal, labelsFinal, validation_thresh=validation_thresh) + # result_roc = result["ROC/AUC"] + # result_f1 = result["f1"] + + # print(result, ETime - STime) + + edge_node = EdgeNode(server_address=server_address, data_set=data_x, model = diffusion_prediction_net) + edge_node.read_data_stream() + # print(torch.stack(edge_node.results)) + denoised_data = torch.stack(edge_node.results).squeeze(1) + print(edge_node.time) + # 边缘节点线程结束后,自动关闭客户端 + print(f"Client closed.") + # print(np.shape(data_x)) + dataset = TensorDataset(data_x, denoised_data) + dataloader = DataLoader(dataset, batch_size = batch_size) + + l1s = [] + feats=labels.shape[1] + for raw, window in dataloader: + window = window.to(device) + l = nn.MSELoss(reduction = 'none') + loss = l(raw, window) + l1s.append(loss) + loss0 = torch.cat(l1s).detach().cpu().numpy() + loss0 = loss0.reshape(-1,feats) + + print(np.shape(loss0)) + lossFinal = np.mean(np.array(loss0), axis=1) + labelsFinal = (np.sum(labels, axis=1) >= 1) + 0 + validation_thresh = 0 + result, fprs, tprs = train_diffusion_val.evaluate(lossFinal, labelsFinal, validation_thresh=validation_thresh) + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + print("ROC",result_roc) + print("f1", result_f1) + print("time",edge_node.time / np.shape(denoised_data)[0]) \ No newline at end of file diff --git a/subject1-4/AdaDiff/data/preprocess_smap_msl.ipynb b/subject1-4/AdaDiff/data/preprocess_smap_msl.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..768a0d5934d45b8b6d884ac9eb393754aab9f4fe --- /dev/null +++ b/subject1-4/AdaDiff/data/preprocess_smap_msl.ipynb @@ -0,0 +1,1261 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/mts/SMAP_MSL/data\n" + ] + } + ], + "source": [ + "%cd '../../../../mts/SMAP_MSL/data'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/mts/SMAP_MSL/data'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "train_folder = '../../../../mts/SMAP_MSL/data/train/'\n", + "test_folder = '../../../../mts/SMAP_MSL/data/test/'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "labeled_anomalies = pd.read_csv('../../../../mts/SMAP_MSL/data/labeled_anomalies.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chan_idspacecraftanomaly_sequencesclassnum_values
0P-1SMAP[[2149, 2349], [4536, 4844], [3539, 3779]][contextual, contextual, contextual]8505
1S-1SMAP[[5300, 5747]][point]7331
2E-1SMAP[[5000, 5030], [5610, 6086]][contextual, contextual]8516
3E-2SMAP[[5598, 6995]][point]8532
4E-3SMAP[[5094, 8306]][point]8307
\n", + "
" + ], + "text/plain": [ + " chan_id spacecraft anomaly_sequences \\\n", + "0 P-1 SMAP [[2149, 2349], [4536, 4844], [3539, 3779]] \n", + "1 S-1 SMAP [[5300, 5747]] \n", + "2 E-1 SMAP [[5000, 5030], [5610, 6086]] \n", + "3 E-2 SMAP [[5598, 6995]] \n", + "4 E-3 SMAP [[5094, 8306]] \n", + "\n", + " class num_values \n", + "0 [contextual, contextual, contextual] 8505 \n", + "1 [point] 7331 \n", + "2 [contextual, contextual] 8516 \n", + "3 [point] 8532 \n", + "4 [point] 8307 " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labeled_anomalies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "random_file = np.load(test_folder + 'A-1.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/.conda/envs/py3.9test/lib/python3.9/site-packages/scipy/__init__.py:138: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.1)\n", + " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion} is required for this version of \"\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "def scale_data(train, test):\n", + " scaler = MinMaxScaler(feature_range=(0, 1), clip=True).fit(train)\n", + "\n", + " train_scaled = scaler.transform(train)\n", + " test_scaled = scaler.transform(test)\n", + "\n", + " return train_scaled, test_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chan_idspacecraftanomaly_sequencesclassnum_values
0P-1SMAP[[2149, 2349], [4536, 4844], [3539, 3779]][contextual, contextual, contextual]8505
1S-1SMAP[[5300, 5747]][point]7331
2E-1SMAP[[5000, 5030], [5610, 6086]][contextual, contextual]8516
3E-2SMAP[[5598, 6995]][point]8532
4E-3SMAP[[5094, 8306]][point]8307
\n", + "
" + ], + "text/plain": [ + " chan_id spacecraft anomaly_sequences \\\n", + "0 P-1 SMAP [[2149, 2349], [4536, 4844], [3539, 3779]] \n", + "1 S-1 SMAP [[5300, 5747]] \n", + "2 E-1 SMAP [[5000, 5030], [5610, 6086]] \n", + "3 E-2 SMAP [[5598, 6995]] \n", + "4 E-3 SMAP [[5094, 8306]] \n", + "\n", + " class num_values \n", + "0 [contextual, contextual, contextual] 8505 \n", + "1 [point] 7331 \n", + "2 [contextual, contextual] 8516 \n", + "3 [point] 8532 \n", + "4 [point] 8307 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smap = labeled_anomalies[labeled_anomalies['spacecraft'] == 'SMAP']\n", + "smap.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chan_idspacecraftanomaly_sequencesclassnum_values
33T-1SMAP[[2399, 3898], [6550, 6585]][point, contextual]8612
\n", + "
" + ], + "text/plain": [ + " chan_id spacecraft anomaly_sequences class \\\n", + "33 T-1 SMAP [[2399, 3898], [6550, 6585]] [point, contextual] \n", + "\n", + " num_values \n", + "33 8612 " + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "smap[smap['chan_id'] == 'T-1']" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
chan_idspacecraftanomaly_sequencesclassnum_values
55M-6MSL[[1850, 2030]][point]2049
56M-1MSL[[1110, 2250]][contextual]2277
57M-2MSL[[1110, 2250]][contextual]2277
58S-2MSL[[900, 910]][point]1827
59P-10MSL[[4590, 4720]][point]6100
\n", + "
" + ], + "text/plain": [ + " chan_id spacecraft anomaly_sequences class num_values\n", + "55 M-6 MSL [[1850, 2030]] [point] 2049\n", + "56 M-1 MSL [[1110, 2250]] [contextual] 2277\n", + "57 M-2 MSL [[1110, 2250]] [contextual] 2277\n", + "58 S-2 MSL [[900, 910]] [point] 1827\n", + "59 P-10 MSL [[4590, 4720]] [point] 6100" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "msl = labeled_anomalies[labeled_anomalies['spacecraft'] == 'MSL']\n", + "msl.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'msl' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/root/Diff-Anomaly/TranAD/data/preprocess_smap_msl.ipynb Cell 12\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m msl[msl[\u001b[39m'\u001b[39m\u001b[39mchan_id\u001b[39m\u001b[39m'\u001b[39m] \u001b[39m==\u001b[39m \u001b[39m'\u001b[39m\u001b[39mT-1\u001b[39m\u001b[39m'\u001b[39m]\n", + "\u001b[0;31mNameError\u001b[0m: name 'msl' is not defined" + ] + } + ], + "source": [ + "msl[msl['chan_id'] == 'T-1']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27\n", + "['C-1', 'C-2', 'D-14', 'D-15', 'D-16', 'F-4', 'F-5', 'F-7', 'F-8', 'M-1', 'M-2', 'M-3', 'M-4', 'M-5', 'M-6', 'M-7', 'P-10', 'P-11', 'P-14', 'P-15', 'S-2', 'T-12', 'T-13', 'T-4', 'T-5', 'T-8', 'T-9']\n" + ] + } + ], + "source": [ + "# smap_files = smap['chan_id'].values\n", + "msl_files = msl['chan_id'].values\n", + "# print(len(smap_files))\n", + "print(len(msl_files))\n", + "# print(smap_files)\n", + "print(sorted(msl_files))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original range for P-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for S-1: [-0.4 1.0], [-0.4 1.0]\n", + "Scaled range for S-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-3: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-4: [-0.9999999999999998 1.0000000000000004], [-0.9999999999999998 1.0000000000000004]\n", + "Scaled range for E-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-5: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-6: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for E-6: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-7: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-8: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-8: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-9: [-1.0 1.0000000000000018], [-1.0 1.0000000000000018]\n", + "Scaled range for E-9: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-10: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-10: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-11: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for E-11: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-12: [-1.0 1.0000000000000002], [-1.0 1.0000000000000002]\n", + "Scaled range for E-12: [0.0 1.0], [0.0 1.0]\n", + "Original range for E-13: [-1.0 1.0000000000000002], [-1.0 1.0000000000000002]\n", + "Scaled range for E-13: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-1: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for A-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-3: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-3: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-4: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-3: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-4: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for G-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for G-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-5: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for D-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-6: [-0.9999999999999999 1.0], [-0.9999999999999999 1.0]\n", + "Scaled range for D-6: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-7: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-1: [-0.34 1.0], [-0.34 1.0]\n", + "Scaled range for F-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-4: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-3: [-0.9999999999999998 1.0], [-0.9999999999999998 1.0]\n", + "Scaled range for G-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-8: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-8: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-9: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-9: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-2: [-0.54 1.0], [-0.54 1.0]\n", + "Scaled range for F-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-4: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for G-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-3: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for T-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-11: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-11: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-12: [-1.0 0.0], [-1.0 0.0]\n", + "Scaled range for D-12: [0.0 0.0], [0.0 1.0]\n", + "Original range for B-1: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for B-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-6: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for G-6: [0.0 1.0], [0.0 1.0]\n", + "Original range for G-7: [-0.9999999999999998 1.0], [-0.9999999999999998 1.0]\n", + "Scaled range for G-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-7: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for R-1: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for R-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-5: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-6: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-6: [0.0 1.0], [0.0 0.9999999999999999]\n", + "Original range for A-7: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-13: [-1.0 0.0], [-1.0 0.0]\n", + "Scaled range for D-13: [0.0 0.0], [0.0 1.0]\n", + "Original range for P-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for P-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-8: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-8: [0.0 1.0], [0.0 1.0]\n", + "Original range for A-9: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for A-9: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-3: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for F-3: [0.0 1.0], [0.0 1.0]\n" + ] + } + ], + "source": [ + "for file in smap_files:\n", + " # load\n", + " train_original = np.load(train_folder + f'{file}.npy')\n", + " test_original = np.load(test_folder + f'{file}.npy')\n", + " print(f'Original range for {file}: [{train_original.min()} {train_original.max()}], [{train_original.min()} {train_original.max()}]')\n", + " # scale\n", + " train_scaled, test_scaled = scale_data(train_original, test_original)\n", + " print(f'Scaled range for {file}: [{train_scaled.min()} {train_scaled.max()}], [{test_scaled.min()} {test_scaled.max()}]')\n", + " # build labels\n", + " labels = np.zeros_like(test_scaled)\n", + " indices = smap[smap['chan_id'] == file]['anomaly_sequences'].values[0]\n", + " indices = indices.replace(']', '').replace('[', '').split(', ')\n", + " indices = [int(i) for i in indices]\n", + " for i in range(0, len(indices), 2):\n", + " labels[indices[i]:indices[i+1], :] = 1\n", + " # save\n", + " dir = '../../TranAD/processed/SMAP'\n", + " os.makedirs(dir, exist_ok=True)\n", + " np.save(f'{dir}/{file}_train.npy', train_scaled)\n", + " np.save(f'{dir}/{file}_test.npy', test_scaled)\n", + " np.save(f'{dir}/{file}_labels.npy', labels) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Original range for M-6: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for M-6: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-1: [-0.9160935512741932 2.4922982712327473], [-0.9160935512741932 2.4922982712327473]\n", + "Scaled range for M-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-2: [-1.210726170949998 1.0], [-1.210726170949998 1.0]\n", + "Scaled range for M-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for S-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for S-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-10: [0.0 1.001129464915996], [0.0 1.001129464915996]\n", + "Scaled range for P-10: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-4: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-5: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-7: [-0.9999999999999998 1.0], [-0.9999999999999998 1.0]\n", + "Scaled range for F-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-3: [-1.47721668720188 1.000070758018348], [-1.47721668720188 1.000070758018348]\n", + "Scaled range for M-3: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-4: [-1.4654640190905774 1.00000547321409], [-1.4654640190905774 1.00000547321409]\n", + "Scaled range for M-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-5: [-1.2550059949886205 1.0], [-1.2550059949886205 1.0]\n", + "Scaled range for M-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-15: [0.0 1.0052196607220525], [0.0 1.0052196607220525]\n", + "Scaled range for P-15: [0.0 1.0], [0.0 1.0]\n", + "Original range for C-1: [-1.0 2.1934477379095165], [-1.0 2.1934477379095165]\n", + "Scaled range for C-1: [0.0 1.0], [0.0 1.0]\n", + "Original range for C-2: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for C-2: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-12: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-12: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-13: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-13: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-4: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for F-4: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-5: [-1.1163775338154294 4.162651279553374], [-1.1163775338154294 4.162651279553374]\n", + "Scaled range for F-5: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-14: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for D-14: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-9: [-1.0 1.0], [-1.0 1.0]\n", + "Scaled range for T-9: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-14: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for P-14: [0.0 1.0], [0.0 1.0]\n", + "Original range for T-8: [-1.0 1.0294117647058822], [-1.0 1.0294117647058822]\n", + "Scaled range for T-8: [0.0 1.0], [0.0 1.0]\n", + "Original range for P-11: [0.0 1.0], [0.0 1.0]\n", + "Scaled range for P-11: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-15: [-1.0 1.1915779731605738], [-1.0 1.1915779731605738]\n", + "Scaled range for D-15: [0.0 1.0], [0.0 1.0]\n", + "Original range for D-16: [-1.0 1.008879901529805], [-1.0 1.008879901529805]\n", + "Scaled range for D-16: [0.0 1.0], [0.0 1.0]\n", + "Original range for M-7: [-1.0020241085789672 1.0], [-1.0020241085789672 1.0]\n", + "Scaled range for M-7: [0.0 1.0], [0.0 1.0]\n", + "Original range for F-8: [-1.0 1.1304347826086958], [-1.0 1.1304347826086958]\n", + "Scaled range for F-8: [0.0 1.0], [0.0 1.0]\n" + ] + } + ], + "source": [ + "for file in msl_files:\n", + " # load\n", + " train_original = np.load(train_folder + f'{file}.npy')\n", + " test_original = np.load(test_folder + f'{file}.npy')\n", + " print(f'Original range for {file}: [{train_original.min()} {train_original.max()}], [{train_original.min()} {train_original.max()}]')\n", + " # scale\n", + " train_scaled, test_scaled = scale_data(train_original, test_original)\n", + " print(f'Scaled range for {file}: [{train_scaled.min()} {train_scaled.max()}], [{test_scaled.min()} {test_scaled.max()}]')\n", + " # build labels\n", + " labels = np.zeros_like(test_scaled)\n", + " indices = msl[msl['chan_id'] == file]['anomaly_sequences'].values[0]\n", + " indices = indices.replace(']', '').replace('[', '').split(', ')\n", + " indices = [int(i) for i in indices]\n", + " for i in range(0, len(indices), 2):\n", + " labels[indices[i]:indices[i+1], :] = 1\n", + " # save\n", + " dir = '../../TranAD/processed/MSL'\n", + " os.makedirs(dir, exist_ok=True)\n", + " np.save(f'{dir}/{file}_train.npy', train_scaled)\n", + " np.save(f'{dir}/{file}_test.npy', test_scaled)\n", + " np.save(f'{dir}/{file}_labels.npy', labels) \n" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [], + "source": [ + "t_1_test = np.load(test_folder + 'T-1.npy')\n", + "t_1_train = np.load(train_folder + 'T-1.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2875, 25)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t_1_train.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "min_original_test = t_1_test.min()\n", + "max_original_test = t_1_test.max()\n", + "min_original_train = t_1_train.min()\n", + "max_original_train = t_1_train.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-1.0 1.0\n", + "-1.0 1.0\n" + ] + } + ], + "source": [ + "print(f'{min_original_train} {max_original_train}')\n", + "print(f'{min_original_test} {max_original_test}')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.023095539152304296\n", + "0.020554483585080625\n" + ] + } + ], + "source": [ + "print(t_1_train.mean())\n", + "print(t_1_test.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "t_1_train_scaled, t_1_test_scaled = scale_data(t_1_train, t_1_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "min_train_scaled = t_1_train_scaled.min()\n", + "max_train_scaled = t_1_train.max()\n", + "min_test_scaled = t_1_test_scaled.min()\n", + "max_test_scaled = t_1_test.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0 1.0\n", + "0.0 1.0\n" + ] + } + ], + "source": [ + "print(f'{min_train_scaled} {max_train_scaled}')\n", + "print(f'{min_test_scaled} {max_test_scaled}')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0315616826196304\n", + "0.030848537658773482\n" + ] + } + ], + "source": [ + "print(t_1_train_scaled.mean())\n", + "print(t_1_test_scaled.mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 191, 112, 88, 76, 64, 69065, 84, 76, 176,\n", + " 1943]),\n", + " array([-1. , -0.8, -0.6, -0.4, -0.2, 0. , 0.2, 0.4, 0.6, 0.8, 1. ]))" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.histogram(t_1_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import seaborn as sns\n", + "sns.histplot(t_1_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "\n", + "sns.histplot(t_1_train_scaled)" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(t_1_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkQAAAJCCAYAAAAhudhHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAABWXElEQVR4nO3de1yUdd7/8TeiCGOC4ahAOoqHBLXM1Awtw83VyLvN9N7W1nXxUG6FlbK3mZZpmalZapmLW2vg3uVa7crmamnmiTW1FGXLHEwSHU3AJg+I4xGu3x/9nDvyEANzuHRez8djHnld13eu72euFN5zHb7fEMMwDAEAAASxWoEuAAAAINAIRAAAIOgRiAAAQNAjEAEAgKBHIAIAAEGPQAQAAIIegQgAAAQ9AhEAAAh6tQNdAACUl5fr7NmzgS4joOrUqaPQ0NBAlwEELQIRgIAqKyvTgQMHFOyD5oeEhKhp06a65pprAl0KEJRCmLoDQKCUl5dr9+7dslgsatSokUJCQgJdUkAYhqHvvvtOLpdLbdq04UwREACcIQIQMGfPnpVhGGrUqJEiIiICXU5ANWrUSHv37tXZs2cJREAAcFM1gIAL1jNDP8YxAAKLQAQAAIIel8wAmI7D4ZDT6fRbf1arVTabzW/9ATAfAhEAU3E4HEpMTJTL5fJbnxaLRXa7nVAEBDECEQBTcTqdcrlcyho7UYm25j7vz+7Yp6Ezp8jpdHociObNm6eZM2equLhYHTt21Ny5c3XLLbf4qFIAvkQgAmBKibbm6tS6baDLuKR3331X6enpmj9/vrp166Y5c+aob9++2rVrlxo3bhzo8gB4iJuqAaAaZs2apYceekjDhg1Tu3btNH/+fFksFr311luBLg1ANRCIAMBDZ86cUW5urnr37u1eV6tWLfXu3VubNm0KYGUAqotABAAecjqdKi8vV5MmTSqtb9KkiYqLiwNUFYCaIBABAICgRyACAA9ZrVaFhoaqpKSk0vqSkhLFxMQEqCoANUEgAgAPhYWFqXPnzlq9erV7XUVFhVavXq2kpKQAVgagunjsHoAp2R37TN1Penq6UlNT1aVLF91yyy2aM2eOTpw4oWHDhnm5QgD+QCACYCpWq1UWi0VDZ07xW58Wi0VWq9Wj9/zmN7/Rd999p2effVbFxcW66aabtGLFigtutAZwZQgxDMMIdBEAgtOpU6dUWFio+Ph4hYeHu9cH41xmlzoWAPyDM0QATMdmswU8oAAILtxUDQAAgh6BCAAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNAjEAEAgKDHOEQATCcYB2YEEFgEIgCm4nA4lJiYKJfL5bc+LRaL7HY7oQgIYgQiAKbidDrlcrmUNe4JJTZr6vP+7PsPaOiMV+V0Oj0KRDk5OZo5c6Zyc3NVVFSk7Oxs9e/f33eFAvApAhEAU0ps1lSd2rQKdBmXdOLECXXs2FHDhw/XgAEDAl0OgBoiEAFANaSkpCglJSXQZQDwEp4yAwAAQY9ABAAAgh6BCAAABD0CEQAACHoEIgAAEPR4ygyAKdn3HzB1P2VlZSooKHAvFxYWKi8vT9HR0QzwCFyBCEQATMVqtcpisWjojFf91qfFYpHVavXoPVu3blWvXr3cy+np6ZKk1NRUZWVlebM8AH5AIAJgKjabTXa73fRzmSUnJ8swDB9VBMDfCEQATMdms3HZCYBfcVM1AAAIegQiAAAQ9AhEAAAg6BGIAABA0CMQAQCAoEcgAgAAQY9ABAAAgh7jEAEwHYfDYfqBGQFcXQhEAEzF4XAoMTFBLtdJv/VpsUTIbs+vciiaNm2alixZovz8fEVERKh79+6aMWOG2rZt6+NKAfgKgQiAqTidTrlcJ/WXCcPUtnmsz/vbta9ID76YKafTWeVAtH79eqWlpalr1646d+6cJkyYoD59+mjnzp2qV6+ejysG4AsEIgCm1LZ5rG663pyXsVasWFFpOSsrS40bN1Zubq569uwZoKoA1AQ3VQNADR07dkySFB0dHeBKAFQXgQgAaqCiokKjR49Wjx491KFDh0CXA6CauGQGADWQlpamHTt2aMOGDYEuBUANEIgAoJpGjRqlZcuWKScnR02bNg10OQBqgEAEAB4yDEOPPfaYsrOztW7dOsXHxwe6JAA1RCACAA+lpaVp0aJF+uCDD1S/fn0VFxdLkqKiohQRERHg6gBUB4EIgCnt2ldk2n4yMjIkScnJyZXWZ2ZmaujQoV6oCoC/EYgAmIrVapXFEqEHX8z0W58WS4SsVmuV2xuG4cNqAAQCgQiAqdhsNtnt+cxlBsCvCEQATMdmsxFQAPgVAzMCAICgRyACAABBj0AEAACCHoEIAAAEPQIRAAAIegQiAAAQ9AhEAAAg6DEOEQDTcTgcDMwIwK8IRABMxeFwKDExQS7XSb/1abFEyG7Pr3IoysjIUEZGhvbu3StJat++vZ599lmlpKT4sEoAvkQgAmAqTqdTLtdJvT5pmNq0iPV5f7v3FmnUc5lyOp1VDkRNmzbV9OnT1aZNGxmGoYULF+ree+/V9u3b1b59ex9XDMAXCEQATKlNi1jd2Nacl7HuueeeSstTp05VRkaGNm/eTCACrlAEIgCogfLycr3//vs6ceKEkpKSAl0OgGoiEAFANXz55ZdKSkrSqVOndM011yg7O1vt2rULdFkAqonH7gGgGtq2bau8vDx99tlneuSRR5SamqqdO3cGuiwA1cQZIgCohrCwMLVu3VqS1LlzZ23ZskWvvvqq/vznPwe4MgDVwRkiAPCCiooKnT59OtBlAKgmzhABgIfGjx+vlJQU2Ww2HT9+XIsWLdK6deu0cuXKQJcGoJoIRABMaffeItP2c+jQIf3+979XUVGRoqKidOONN2rlypX65S9/6YMKAfgDgQiAqVitVlksERr1XKbf+rRYImS1WqvcfsGCBT6sBkAgEIgAmIrNZpPdns9cZgD8ikAEwHRsNhsBBYBf8ZQZAAAIegQiAAAQ9AhEAAAg6BGIAABA0CMQAQCAoEcgAgAAQY9ABAAAgh7jEAEwHYfDwcCMAPyKQATAVBwOhxITE+RynfRbnxZLhOz2/GqHounTp2v8+PF64oknNGfOHO8WB8AvCEQATMXpdMrlOqlZzw9T6xaxPu+vYG+R0p/NlNPprFYg2rJli/785z/rxhtv9EF1APyFQATAlFq3iFWHBHNfxiorK9PgwYP15ptv6oUXXgh0OQBqgJuqAaCa0tLS1K9fP/Xu3TvQpQCoIc4QAUA1LF68WNu2bdOWLVsCXQoALyAQAYCH9u/fryeeeEKrVq1SeHh4oMsB4AUEIgDwUG5urg4dOqSbb77Zva68vFw5OTl6/fXXdfr0aYWGhgawQgCeIhABgIfuvPNOffnll5XWDRs2TAkJCRo3bhxhCLgCEYgAwEP169dXhw4dKq2rV6+eGjZseMF6AFcGAhEAUyrYW3RV9QPA3AhEAEzFarXKYolQ+rOZfuvTYomQ1Wqt0T7WrVvnnWIABASBCICp2Gw22e35zGUGwK8IRABMx2azEVAA+BUjVQMAgKBHIAIAAEGPQAQAAIIegQgAAAQ9AhEAAAh6BCIAABD0CEQAACDoMQ4RANNxOBwMzAjArwhEAEzF4XAoMTFBLtdJv/VpsUTIbs+vciiaPHmynnvuuUrr2rZtq/z8fF+UB8APCEQATMXpdMrlOqkZU4apVXyMz/v7prBY4yZmyul0enSWqH379vrkk0/cy7Vr8+MUuJLxLxiAKbWKj1G7RPNexqpdu7ZiYnwf2AD4BzdVA0A17N69W3FxcWrZsqUGDx4sh8MR6JIA1ACBCAA81K1bN2VlZWnFihXKyMhQYWGhbr/9dh0/fjzQpQGoJi6ZAYCHUlJS3H++8cYb1a1bNzVv3lzvvfeeRowYEcDKAFQXZ4gAoIYaNGig66+/XgUFBYEuBUA1EYgAoIbKysr0zTffKDY2NtClAKgmAhEAeOh//ud/tH79eu3du1cbN27Ufffdp9DQUD3wwAOBLg1ANXEPEQBT+qaw2LT9HDhwQA888IC+//57NWrUSLfddps2b96sRo0a+aBCAP5AIAJgKlarVRZLhMZNzPRbnxZLhKxWa5XbL1682IfVAAgEAhEAU7HZbLLb85nLDIBfEYgAmI7NZiOgAPArbqoGAABBj0AEAACCHoEIAAAEPQIRAAAIegQiAAAQ9AhEAAAg6BGIAABA0GMcIgCm43A4GJgRgF8RiACYisPhUGJiglyuk37r02KJkN2e71Eo+vbbbzVu3Dh99NFHcrlcat26tTIzM9WlSxcfVgrAVwhEAEzF6XTK5TqpKVOHKb5ljM/7K9xTrIlPZ8rpdFY5EB05ckQ9evRQr1699NFHH6lRo0bavXu3rr32Wh9XC8BXCEQATCm+ZYwSE815GWvGjBlq1qyZMjP/bwLa+Pj4AFYEoKa4qRoAPLR06VJ16dJFv/71r9W4cWN16tRJb775ZqDLAlADBCIA8NCePXuUkZGhNm3aaOXKlXrkkUf0+OOPa+HChYEuDUA1cckMADxUUVGhLl266MUXX5QkderUSTt27ND8+fOVmpoa4OoAVAdniADAQ7GxsWrXrl2ldYmJiXI4HAGqCEBNEYgAwEM9evTQrl27Kq37+uuv1bx58wBVBKCmCEQA4KExY8Zo8+bNevHFF1VQUKBFixbpjTfeUFpaWqBLA1BN3EMEwJQK9xSbtp+uXbsqOztb48eP1/PPP6/4+HjNmTNHgwcP9kGFAPwhxDAMI9BFAAhOp06dUmFhoeLj4xUeHi7pyhmp2tsudiwA+A9niACYis1mk92ez1xmAPyKQATAdGw2GwEFgF9xUzUAAAh6BCIAABD0CEQAACDoEYgAAEDQIxABAICgRyACAABBj0AEAACCHuMQATAdh8PBwIwA/IpABMBUroSpO1q0aKF9+/ZdsP7RRx/VvHnzvF0eAD8gEAEwFafTKZfrpJ6ZNkzNW8b4vL99e4r1wvhMOZ3OKgeiLVu2qLy83L28Y8cO/fKXv9Svf/1rX5UJwMcIRABMqXnLGLVtZ87LWI0aNaq0PH36dLVq1Up33HFHgCoCUFPcVA0ANXDmzBm9/fbbGj58uEJCQgJdDoBqIhABQA3885//1NGjRzV06NBAlwKgBghEAFADCxYsUEpKiuLi4gJdCoAa4B4iAKimffv26ZNPPtGSJUsCXQqAGuIMEQBUU2Zmpho3bqx+/foFuhQANUQgAoBqqKioUGZmplJTU1W7NifbgSsd/4oBmNK+PcWm7ueTTz6Rw+HQ8OHDvVwRgEAgEAEwFavVKoslQi+Mz/RbnxZLhKxWq0fv6dOnjwzD8FFFAPyNQFQFFRUVOnjwoOrXr884I4AXnTlzRhUVFSovL3eP/Hzddddpx46v/D6X2XXXXVdp9Gl/Ky8vV0VFhcrKynTmzJmA1QFcTQzD0PHjxxUXF6datS5/lxCBqAoOHjyoZs2aBboM4KrTvHlzzZ8/XydPXjhvmT+/fHz//ff6/vvv/dbfpTidTvXr1++i86QBqL79+/eradOml21DIKqC+vXrS/rhgEZGRga4GuDqcebMGZWUlKhFixYKDw8PdDkBderUKe3du1dbt25VWFhYoMsBrgqlpaVq1qyZ+/f45RCIquD8N9XIyEgCEeBFp06d0nfffafQ0FCFhoYGupyACg0NVa1atXTNNdcEfTgEvK0qZ5x57B4AAAQ9AhEAAAh6BCIAABD0CEQAACDoEYgAAEDQ4ykzAKbjcDj8PjCjzWbzW38AzIdABMBUHA6HEhMT5HJdOFijr1gsEbLb86scisrLyzV58mS9/fbbKi4uVlxcnIYOHapnnnmG0eyBKxSBCICpOJ1OuVwnNWbGMDVtFePz/g58U6zZ4zLldDqrHIhmzJihjIwMLVy4UO3bt9fWrVs1bNgwRUVF6fHHH/dxxQB8gUAEwJSatopRq3bmvIy1ceNG3XvvverXr58kqUWLFvrb3/6mzz//PMCVAagubqoGAA91795dq1ev1tdffy1J+s9//qMNGzYoJSUlwJUBqC7OEAGAh5566imVlpYqISFBoaGhKi8v19SpUzV48OBAlwagmghEAOCh9957T++8844WLVqk9u3bKy8vT6NHj1ZcXJxSU1MDXR6AaiAQAYCHxo4dq6eeekqDBg2SJN1www3at2+fpk2bRiACrlAEIhNo06aN9u/f77P9N2vWTLt37/bZ/oFg43K5VKtW5VswQ0NDVVFREaCKANQUgSjA2rRpo4KCAp/2UVBQoDZt2hCKAC+55557NHXqVNlsNrVv317bt2/XrFmzNHz48ECXBqCaCEQB9s033yhEkuHDPkL+fz/AleTAN8Wm7Wfu3LmaOHGiHn30UR06dEhxcXH6wx/+oGeffdYHFQLwBwJRgBnGD1Fo/B1t1bl5A6/vP3ffUU1bv0syfBm5AO+xWq2yWCI0e1ym3/q0WCJktVqr3L5+/fqaM2eO5syZ47uiAPgVgcgkWl5bTx0bR3p9v0dKz3p9n4Av2Ww22e35zGUGwK8IRCYRUkuqW9f742SGMPQmrkA2m42AAsCvCEQmESIpNNT7k0IyzSQAAD+P8wcAACDoEYgAAEDQIxABAICgRyACAABBj0AEAACCHoEIAAAEPR67B2A6DoeDgRkB+BWBCICpOBwOJSQm6KTrpN/6jLBEKN+e71EoOn78uCZOnKjs7GwdOnRInTp10quvvqquXbv6sFIAvkIgAmAqTqdTJ10nNWzmKMW2vM7n/RXt+VaZY1+X0+n0KBA9+OCD2rFjh/73f/9XcXFxevvtt9W7d2/t3LlT113n+7oBeBeBCIApxba8Trb28YEu46JOnjypf/zjH/rggw/Us2dPSdLkyZP1r3/9SxkZGXrhhRcCXCEAT3FTNQB46Ny5cyovL1d4eHil9REREdqwYUOAqgJQEwQiAPBQ/fr1lZSUpClTpujgwYMqLy/X22+/rU2bNqmoqCjQ5QGoBgIRAFTD//7v/8owDF133XWqW7euXnvtNT3wwAOqVYsfq8CViH+5AFANrVq10vr161VWVqb9+/fr888/19mzZ9WyZctAlwagGghEAFAD9erVU2xsrI4cOaKVK1fq3nvvDXRJAKohoIGovLxcEydOVHx8vCIiItSqVStNmTJFhmG42xiGoWeffVaxsbGKiIhQ7969tXv37kr7OXz4sAYPHqzIyEg1aNBAI0aMUFlZWaU2X3zxhW6//XaFh4erWbNmeumll/zyGQFcnVauXKkVK1aosLBQq1atUq9evZSQkKBhw4YFujQA1RDQx+5nzJihjIwMLVy4UO3bt9fWrVs1bNgwRUVF6fHHH5ckvfTSS3rttde0cOFCxcfHa+LEierbt6927tzpfsJj8ODBKioq0qpVq3T27FkNGzZMI0eO1KJFiyRJpaWl6tOnj3r37q358+fryy+/1PDhw9WgQQONHDkyYJ8fwKUV7fnW1P0cO3ZM48eP14EDBxQdHa2BAwdq6tSpqlOnjpcrBOAPAQ1EGzdu1L333qt+/fpJklq0aKG//e1v+vzzzyX9cHZozpw5euaZZ9ynof/617+qSZMm+uc//6lBgwbJbrdrxYoV2rJli7p06SJJmjt3ru6++269/PLLiouL0zvvvKMzZ87orbfeUlhYmNq3b6+8vDzNmjWLQASYjNVqVYQlQpljX/dbnxGWCFmtVo/ec//99+v+++/3UUUA/C2ggah79+5644039PXXX+v666/Xf/7zH23YsEGzZs2SJBUWFqq4uFi9e/d2vycqKkrdunXTpk2bNGjQIG3atEkNGjRwhyFJ6t27t2rVqqXPPvtM9913nzZt2qSePXsqLCzM3aZv376aMWOGjhw5omuvvbZSXadPn9bp06fdy6Wlpb46BAB+wmazKd+ez1xmAPwqoIHoqaeeUmlpqRISEhQaGqry8nJNnTpVgwcPliQVFxdLkpo0aVLpfU2aNHFvKy4uVuPGjSttr127tqKjoyu1iY+Pv2Af57f9NBBNmzZNzz33nJc+JQBP2Ww2AgoAvwroTdXvvfee3nnnHS1atEjbtm3TwoUL9fLLL2vhwoWBLEvjx4/XsWPH3K/9+/cHtB4AAOBbAT1DNHbsWD311FMaNGiQJOmGG27Qvn37NG3aNKWmpiomJkaSVFJSotjYWPf7SkpKdNNNN0mSYmJidOjQoUr7PXfunA4fPux+f0xMjEpKSiq1Ob98vs2P1a1bV3Xr1vXOhwQAAKYX0DNELpfrglFdQ0NDVVFRIUmKj49XTEyMVq9e7d5eWlqqzz77TElJSZKkpKQkHT16VLm5ue42a9asUUVFhbp16+Zuk5OTo7Nnz7rbrFq1Sm3btr3gchkAAAg+AQ1E99xzj6ZOnarly5dr7969ys7O1qxZs3TfffdJkkJCQjR69Gi98MILWrp0qb788kv9/ve/V1xcnPr37y9JSkxM1F133aWHHnpIn3/+uT799FONGjVKgwYNUlxcnCTpt7/9rcLCwjRixAh99dVXevfdd/Xqq68qPT09UB8dAACYSEAvmc2dO1cTJ07Uo48+qkOHDikuLk5/+MMf9Oyzz7rbPPnkkzpx4oRGjhypo0eP6rbbbtOKFSsqzTL9zjvvaNSoUbrzzjtVq1YtDRw4UK+99pp7e1RUlD7++GOlpaWpc+fOslqtevbZZ3nkHgAASJJCjB8PC42LKi0tVVRUlI4dO6bIyEiv7jskJESStGDAzbqrQ+Ofae25FTsOacSSbZIk/lfDbE6dOqXCwkLFx8dX+pITjDgWgPd58vubucwAAEDQC+glMwC4GIfDwcCMAPyKQATAVBwOhxISE3XS5fJbnxEWi/Ltdo9CUU5OjmbOnKnc3FwVFRUpOzvb/bCH9MMl6kmTJunNN9/U0aNH1aNHD2VkZKhNmzY++AQAaopABMBUnE6nTrpcGvbSeMW29P1Zm6I9DmU+OU1Op9OjQHTixAl17NhRw4cP14ABAy7YXpWJqQGYB4EIgCnFtrTJ1t68Z1NSUlKUkpJy0W1VmZgagLlwUzUAeNnPTUwNwHwIRADgZVWZmBqAuRCIAABA0CMQAYCX/Xhi6h8rKSm56ITSAAKPQAQAXlaViakBmAtPmQFANZSVlamgoMC9XFhYqLy8PEVHR8tms7knpm7Tpo37sfsfT0wNwFwIRABMqWiPw9T9bN26Vb169XIvp6enS5JSU1OVlZVVpYmpAZgHgQiAqVitVkVYLMp8cprf+oywWGS1Wj16T3Jy8mUnTA4JCdHzzz+v559/vqblAfADAhEAU7HZbMq325nLDIBfEYgAmI7NZiOgAPArnjIDAABBj0AEAACCHoEIAAAEPQIRAAAIegQiAAAQ9AhEAAAg6BGIAABA0GMcIgCm43A4GJgRgF8RiACYisPhUEJiok66XH7rM8JiUb7d7lEoysnJ0cyZM5Wbm6uioiJlZ2dXmrh1yZIlmj9/vnJzc3X48GFt375dN910k/eLB+AVBCIApuJ0OnXS5dLwGZMU26qFz/sr+mav3hr3nJxOp0eB6MSJE+rYsaOGDx+uAQMGXHT7bbfdpvvvv18PPfSQN0sG4AMEIgCmFNuqhWzt2ga6jEtKSUlRSkrKJbcPGTJEkrR3714/VQSgJripGgAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNDjKTMAplT0zV5T91NWVqaCggL3cmFhofLy8hQdHS2bzabDhw/L4XDo4MGDkqRdu3ZJkmJiYhQTE1PjugF4F4EIgKlYrVZFWCx6a9xzfuszwmKR1Wr16D1bt25Vr1693Mvp6emSpNTUVGVlZWnp0qUaNmyYe/ugQYMkSZMmTdLkyZNrXjQAryIQATAVm82mfLvd9FN3JCcnyzCMS24fOnSohg4dWsPKAPgLgQiA6dhsNuYWA+BX3FQNAACCHoEIAAAEPQIRAAAIegQiAAAQ9AhEAAAg6BGIAABA0CMQAQCAoEcgAgAAQY+BGQGYjsPhMP1I1QCuLgQiAKbicDiUkJioky6X3/qMsFiUb7d7FIpycnI0c+ZM5ebmqqioSNnZ2erfv78k6ezZs3rmmWf04Ycfas+ePYqKilLv3r01ffp0xcXF+ehTAKgJAhEAU3E6nTrpcmn4jBcU2zLe5/0V7SnUW+OekdPp9CgQnThxQh07dtTw4cM1YMCASttcLpe2bdumiRMnqmPHjjpy5IieeOIJ/epXv9LWrVu9/REAeAGBCIApxbaMl61dYqDLuKSUlBSlpKRcdFtUVJRWrVpVad3rr7+uW265RQ6Hg8tzgAlxUzUA+MGxY8cUEhKiBg0aBLoUABdBIAIAHzt16pTGjRunBx54QJGRkYEuB8BFEIgAwIfOnj2r+++/X4ZhKCMjI9DlALgE7iECAB85H4b27dunNWvWcHYIMDECEQD4wPkwtHv3bq1du1YNGzYMdEkALoNABMCUivYUmrqfsrIyFRQUuJcLCwuVl5en6OhoxcbG6r//+7+1bds2LVu2TOXl5SouLpYkRUdHKywszCu1A/AeAhEAU7FarYqwWPTWuGf81meExSKr1erRe7Zu3apevXq5l9PT0yVJqampmjx5spYuXSpJuummmyq9b+3atUpOTq5RvQC8j0AEwFRsNpvy7XbTT92RnJwswzAuuf1y2wCYD4EIgOnYbDYGLwTgVzx2DwAAgh6BCAAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNAjEAEAgKBHIAIAAEGPgRkBmI7D4TD9SNUAri4EIgCm4nA4lJCYqJMul9/6jLBYlG+3exSKcnJyNHPmTOXm5qqoqEjZ2dnq37+/e/vkyZO1ePFi7d+/X2FhYercubOmTp2qbt26+eATAKgpAhEAU3E6nTrpcmnEjOmKadnS5/0V79mjBeOektPp9CgQnThxQh07dtTw4cM1YMCAC7Zff/31ev3119WyZUudPHlSs2fPVp8+fVRQUKBGjRp58yMA8AICEQBTimnZUs3btQt0GZeUkpKilJSUS27/7W9/W2l51qxZWrBggb744gvdeeedvi4PgIe4qRoAfOzMmTN64403FBUVpY4dOwa6HAAXwRkiAPCRZcuWadCgQXK5XIqNjdWqVatktVoDXRaAi+AMEQD4SK9evZSXl6eNGzfqrrvu0v33369Dhw4FuiwAFxHwQPTtt9/qd7/7nRo2bKiIiAjdcMMN2rp1q3u7YRh69tlnFRsbq4iICPXu3Vu7d++utI/Dhw9r8ODBioyMVIMGDTRixAiVlZVVavPFF1/o9ttvV3h4uJo1a6aXXnrJL58PQPCqV6+eWrdurVtvvVULFixQ7dq1tWDBgkCXBeAiAhqIjhw5oh49eqhOnTr66KOPtHPnTr3yyiu69tpr3W1eeuklvfbaa5o/f74+++wz1atXT3379tWpU6fcbQYPHqyvvvpKq1at0rJly5STk6ORI0e6t5eWlqpPnz5q3ry5cnNzNXPmTE2ePFlvvPGGXz8vgOBWUVGh06dPB7oMABcR0HuIZsyYoWbNmikzM9O9Lj4+3v1nwzA0Z84cPfPMM7r33nslSX/961/VpEkT/fOf/9SgQYNkt9u1YsUKbdmyRV26dJEkzZ07V3fffbdefvllxcXF6Z133tGZM2f01ltvKSwsTO3bt1deXp5mzZpVKTgBMI/iPXtM3U9ZWZkKCgrcy4WFhcrLy1N0dLQaNmyoqVOn6le/+pViY2PldDo1b948ffvtt/r1r3/trdIBeFFAA9HSpUvVt29f/frXv9b69et13XXX6dFHH9VDDz0k6YcfMMXFxerdu7f7PVFRUerWrZs2bdqkQYMGadOmTWrQoIE7DElS7969VatWLX322We67777tGnTJvXs2VNhYWHuNn379tWMGTN05MiRSmekJOn06dOVvsWVlpb66hAA+Amr1aoIi0ULxj3ltz4jLBaPb3beunWrevXq5V5OT0+XJKWmpmr+/PnKz8/XwoUL5XQ61bBhQ3Xt2lX//ve/1b59e6/WDsA7AhqI9uzZo4yMDKWnp2vChAnasmWLHn/8cYWFhSk1NVXFxcWSpCZNmlR6X5MmTdzbiouL1bhx40rba9eurejo6Eptfnzm6cf7LC4uviAQTZs2Tc8995z3PiiAKrPZbMq3200/dUdycrIMw7jk9iVLltS0LAB+FNBAVFFRoS5duujFF1+UJHXq1Ek7duzQ/PnzlZqaGrC6xo8f7/62J/1whqhZs2YBqwcINjabjbnFAPhVQG+qjo2NVbufjESbmJgoh8MhSYqJiZEklZSUVGpTUlLi3hYTE3PBY6znzp3T4cOHK7W52D5+3MeP1a1bV5GRkZVeAADg6hXQQNSjRw/t2rWr0rqvv/5azZs3l/TDDdYxMTFavXq1e3tpaak+++wzJSUlSZKSkpJ09OhR5ebmutusWbNGFRUV7kkUk5KSlJOTo7Nnz7rbrFq1Sm3btr3gchkAAAg+AQ1EY8aM0ebNm/Xiiy+qoKBAixYt0htvvKG0tDRJUkhIiEaPHq0XXnhBS5cu1Zdffqnf//73iouLc88qnZiYqLvuuksPPfSQPv/8c3366acaNWqUBg0apLi4OEk/zCkUFhamESNG6KuvvtK7776rV199tdJlMQAAELwCeg9R165dlZ2drfHjx+v5559XfHy85syZo8GDB7vbPPnkkzpx4oRGjhypo0eP6rbbbtOKFSsUHh7ubvPOO+9o1KhRuvPOO1WrVi0NHDhQr732mnt7VFSUPv74Y6Wlpalz586yWq169tlneeQeAABIkkKMyz0mAUk/XKaLiorSsWPHvH4/UUhIiCRpwYCbdVeHxj/T2nMrdhzSiCXbJOmyT8QAgXDq1CkVFhYqPj6+0pecYMSxALzPk9/fAZ+6AwAAINAIRAAAIOgRiAAAQNAL6E3VAHAxDofD9CNVA7i6EIgAmIrD4VBCYqJOulx+6zPCYlG+3e5RKMrJydHMmTOVm5uroqIiZWdnu4cD+amHH35Yf/7znzV79myNHj3aO0UD8CoCEQBTcTqdOulyacT0lxXTspXP+yve840WPPU/cjqdHgWiEydOqGPHjho+fLgGDBhwyXbZ2dnavHmze1w0AOZEIAJgSjEtW6l5O/PODJ+SkqKUlJTLtvn222/12GOPaeXKlerXr5+fKgNQHdxUDQA+UFFRoSFDhmjs2LFq3968wQ7ADwhEAOADM2bMUO3atfX4448HuhQAVcAlMwDwstzcXL366qvatm2bezR6AObGGSIA8LJ///vfOnTokGw2m2rXrq3atWtr3759+uMf/6gWLVoEujwAF8EZIgDwsiFDhqh3796V1vXt21dDhgzRsGHDAlQVgMshEAEwpeI935i6n7KyMhUUFLiXCwsLlZeXp+joaNlsNjVs2LBS+zp16igmJkZt27atUb0AfINABMBUrFarIiwWLXjqf/zWZ4TFIqvV6tF7tm7dql69ermX09PTJUmpqanKysryZnkA/IBABMBUbDab8u1200/dkZycLMMwqtx+7969HlYFwJ8IRABMx2azMbcYAL/iKTMAABD0CEQAACDoEYgAAEDQIxABAICgRyACAABBj0AEAACCHoEIAAAEPQIRAAAIegzMCMB0HA6H6UeqBnB1IRABMBWHw6GExESddLn81meExaJ8u92jUJSTk6OZM2cqNzdXRUVFys7OVv/+/d3bhw4dqoULF1Z6T9++fbVixQpvlQ3AiwhEAEzF6XTqpMulEdPmKLZla5/3V7SnQAvGj5bT6fQoEJ04cUIdO3bU8OHDNWDAgIu2ueuuu5SZmelerlu3bo3rBeAbBCIAphTbsrWat7sh0GVcUkpKilJSUi7bpm7duoqJifFTRQBqgpuqAcBH1q1bp8aNG6tt27Z65JFH9P333we6JACXwBkiAPCBu+66SwMGDFB8fLy++eYbTZgwQSkpKdq0aZNCQ0MDXR6AnyAQAYAPDBo0yP3nG264QTfeeKNatWqldevW6c477wxgZQAuhktmAOAHLVu2lNVqVUFBQaBLAXARBCIA8IMDBw7o+++/V2xsbKBLAXARXDIDYEpFe/xzJqW6/ZSVlVU621NYWKi8vDxFR0crOjpazz33nAYOHKiYmBh98803evLJJ9W6dWv17dvXW6UD8CICEQBTsVqtirBYtGD8aL/1GWGxyGq1evSerVu3qlevXu7l9PR0SVJqaqoyMjL0xRdfaOHChTp69Kji4uLUp08fTZkyhbGIAJMiEAEwFZvNpny73fRTdyQnJ8swjEtuX7lyZU3LAuBH1QpELVu21JYtW9SwYcNK648ePaqbb75Ze/bs8UpxAIKTzWZjbjEAflWtm6r37t2r8vLyC9afPn1a3377bY2LAgAA8CePzhAtXbrU/eeVK1cqKirKvVxeXq7Vq1erRYsWXisOAADAHzwKROdncg4JCVFqamqlbXXq1FGLFi30yiuveK04AAAAf/AoEFVUVEiS4uPjtWXLFo+fygAAADCjat1UXVhY6O06AAAAAqbaj92vXr1aq1ev1qFDh9xnjs576623alwYAACAv1QrED333HN6/vnn1aVLF8XGxiokJMTbdQEAAPhNtQLR/PnzlZWVpSFDhni7HgCQw+Ew/cCMAK4u1QpEZ86cUffu3b1dCwDI4XAoITFRJ10uv/UZYbEo324nFAFBrFqB6MEHH9SiRYs0ceJEb9cDIMg5nU6ddLn04LQMxca38Xl/RYW79Zfxj8jpdHoUiHJycjRz5kzl5uaqqKhI2dnZ7qFJzrPb7Ro3bpzWr1+vc+fOqV27dvrHP/5B8AJMqFqB6NSpU3rjjTf0ySef6MYbb1SdOnUqbZ81a5ZXigMQvGLj26h5u46BLuOSTpw4oY4dO2r48OEaMGDABdu/+eYb3XbbbRoxYoSee+45RUZG6quvvlJ4eHgAqgXwc6oViL744gvddNNNkqQdO3ZU2sYN1gCCQUpKilJSUi65/emnn9bdd9+tl156yb2uVatW/igNQDVUKxCtXbvW23UAwFWjoqJCy5cv15NPPqm+fftq+/btio+P1/jx4y+4rAbAHKo1uSsA4NIOHTqksrIyTZ8+XXfddZc+/vhj3XfffRowYIDWr18f6PIAXES1zhD16tXrspfG1qxZU+2CAOBKd36w2nvvvVdjxoyRJN10003auHGj5s+frzvuuCOQ5QG4iGoFovP3D5139uxZ5eXlaceOHRdM+goAwcZqtap27dpq165dpfWJiYnasGFDgKoCcDnVCkSzZ8++6PrJkyerrKysRgUBwJUuLCxMXbt21a5duyqt//rrr9W8efMAVQXgcqo9l9nF/O53v9Mtt9yil19+2Zu7BRCEigp3m7qfsrIyFRQUuJcLCwuVl5en6Oho2Ww2jR07Vr/5zW/Us2dP9erVSytWrNC//vUvrVu3zkuVA/AmrwaiTZs2McYGgBqxWq2KsFj0l/GP+K3PCItFVqvVo/ds3bpVvXr1ci+np6dLklJTU5WVlaX77rtP8+fP17Rp0/T444+rbdu2+sc//qHbbrvNq7UD8I5qBaKfDkJmGIaKioq0detWRq8GUCM2m035drvp5zJLTk6WYRiXbTN8+HANHz68JqUB8JNqBaKoqKhKy7Vq1VLbtm31/PPPq0+fPl4pDEDwstlsTG8BwK+qFYgyMzO9XQcAAEDA1OgeotzcXNntdklS+/bt1alTJ68UBQAA4E/VCkSHDh3SoEGDtG7dOjVo0ECSdPToUfXq1UuLFy9Wo0aNvFkjAACAT1Vr6o7HHntMx48f11dffaXDhw/r8OHD2rFjh0pLS/X44497u0YAAACfqtYZohUrVuiTTz5RYmKie127du00b948bqoGAABXnGqdIaqoqFCdOnUuWF+nTh33HD4AAABXimoFol/84hd64okndPDgQfe6b7/9VmPGjNGdd97pteIAAAD8oVqXzF5//XX96le/UosWLdSsWTNJ0v79+9WhQwe9/fbbXi0QQPBxOBymH5gRwNWlWoGoWbNm2rZtmz755BPl5+dL+mEW5969e3u1OADBx+FwKCExUSddLr/1GWGxKN9uJxQBQcyjQLRmzRqNGjVKmzdvVmRkpH75y1/ql7/8pSTp2LFjat++vebPn6/bb7/dJ8UCuPo5nU6ddLn0yNQsxcUn+Ly/g4X5ynh6qJxOp0eBKCcnRzNnzlRubq6KioqUnZ2t/v37u7eHhIRc9H0vvfSSxo4dW9OyAXiZR4Fozpw5euihhxQZGXnBtqioKP3hD3/QrFmzCEQAaiwuPkHxieYd7PXEiRPq2LGjhg8ffsH8jpJUVFRUafmjjz7SiBEjNHDgQH+VCMADHgWi//znP5oxY8Ylt/fp00cvv/xyjYsCALNLSUlRSkrKJbfHxMRUWv7ggw/Uq1cvtWzZ0telAagGjwJRSUnJRR+3d++sdm199913NS4KAK4mJSUlWr58uRYuXBjoUgBcgkeP3V933XXasWPHJbd/8cUXio2NrVYh06dPV0hIiEaPHu1ed+rUKaWlpalhw4a65pprNHDgQJWUlFR6n8PhUL9+/WSxWNS4cWONHTtW586dq9Rm3bp1uvnmm1W3bl21bt1aWVlZ1aoRAKpj4cKFql+//kUvrQEwB48C0d13362JEyfq1KlTF2w7efKkJk2apP/6r//yuIgtW7boz3/+s2688cZK68eMGaN//etfev/997V+/XodPHiw0g+U8vJy9evXT2fOnNHGjRu1cOFCZWVl6dlnn3W3KSwsVL9+/dSrVy/l5eVp9OjRevDBB7Vy5UqP6wSA6njrrbc0ePBghYeHB7oUAJfg0SWzZ555RkuWLNH111+vUaNGqW3btpKk/Px8zZs3T+Xl5Xr66ac9KqCsrEyDBw/Wm2++qRdeeMG9/tixY1qwYIEWLVqkX/ziF5KkzMxMJSYmavPmzbr11lv18ccfa+fOnfrkk0/UpEkT3XTTTZoyZYrGjRunyZMnKywsTPPnz1d8fLxeeeUVST8MD7BhwwbNnj1bffv29ahWAPDUv//9b+3atUvvvvtuoEsBcBkenSFq0qSJNm7cqA4dOmj8+PG67777dN9992nChAnq0KGDNmzYoCZNmnhUQFpamvr163fBGEa5ubk6e/ZspfUJCQmy2WzatGmTJGnTpk264YYbKvXZt29flZaW6quvvnK3+em++/bt697HxZw+fVqlpaWVXgBQHQsWLFDnzp3VsWPHQJcC4DI8HpixefPm+vDDD3XkyBEVFBTIMAy1adNG1157rcedL168WNu2bdOWLVsu2FZcXKywsDA1aNCg0vomTZqouLjY3eanAez88s+1KS0t1cmTJxUREXFB39OmTdNzzz3n8ecB4D0HC/NN3U9ZWZkKCgrcy4WFhcrLy1N0dLR7PKPS0lK9//777jPUAMyrWiNVS9K1116rrl27Vrvj/fv364knntCqVatMd119/PjxSk9Pdy+Xlpa6pygB4FtWq1URFosynh7qtz4jLBZZrVaP3rN161b16tXLvXz+Z0Zqaqr7wY3FixfLMAw98MADXqsVgG9UOxDVVG5urg4dOqSbb77Zva68vFw5OTl6/fXXtXLlSp05c0ZHjx6tdJaopKTEPb5HTEyMPv/880r7Pf8U2o/b/PTJtJKSEkVGRl707JAk1a1bV3Xr1q3xZwTgOZvNpny73fRzmSUnJ8swjMu2GTlypEaOHFmT0gD4ScAC0Z133qkvv/yy0rphw4YpISFB48aNU7NmzVSnTh2tXr3aPbLrrl275HA4lJSUJElKSkrS1KlTdejQITVu3FiStGrVKkVGRqpdu3buNh9++GGlflatWuXeBwDzsdlszCsGwK8CFojq16+vDh06VFpXr149NWzY0L1+xIgRSk9PV3R0tCIjI/XYY48pKSlJt956q6QfRsZu166dhgwZopdeeknFxcV65plnlJaW5j7D8/DDD+v111/Xk08+qeHDh2vNmjV67733tHz5cv9+YAAAYFoBC0RVMXv2bNWqVUsDBw7U6dOn1bdvX/3pT39ybw8NDdWyZcv0yCOPKCkpSfXq1VNqaqqef/55d5v4+HgtX75cY8aM0auvvqqmTZvqL3/5C4/cAwAAN1MFonXr1lVaDg8P17x58zRv3rxLvuf8U2+Xk5ycrO3bt3ujRAAAcBXyaBwiAACAqxGBCAAABD0CEQAACHoEIgAAEPRMdVM1AEiSw+Ew/cCMAK4uBCIApuJwOJSYmCiXy+W3Pi0Wi+x2O6EICGIEIgCm4nQ65XK59MxzWWreIsHn/e3bm68XJg2V0+n0KBDl5ORo5syZys3NVVFRkbKzs9W/f3/39rKyMj311FP65z//qe+//17x8fF6/PHH9fDDD/vgUwCoKQIRAFNq3iJBbRM6BbqMSzpx4oQ6duyo4cOHa8CAARdsT09P15o1a/T222+rRYsW+vjjj/Xoo48qLi5Ov/rVrwJQMYDLIRABQDWkpKQoJSXlkts3btyo1NRUJScnS/photc///nP+vzzzwlEgAnxlBkA+ED37t21dOlSffvttzIMQ2vXrtXXX3+tPn36BLo0ABfBGSIA8IG5c+dq5MiRatq0qWrXrq1atWrpzTffVM+ePQNdGoCLIBABgA/MnTtXmzdv1tKlS9W8eXPl5OQoLS1NcXFx6t27d6DLA/ATBCIA8LKTJ09qwoQJys7OVr9+/SRJN954o/Ly8vTyyy8TiAAT4h4iAPCys2fP6uzZs6pVq/KP2NDQUFVUVASoKgCXwxkiAKa0b2++qfspKytTQUGBe7mwsFB5eXmKjo6WzWbTHXfcobFjxyoiIkLNmzfX+vXr9de//lWzZs3yVukAvIhABMBUrFarLBaLXpg01G99WiwWWa1Wj96zdetW9erVy72cnp4uSUpNTVVWVpYWL16s8ePHa/DgwTp8+LCaN2+uqVOnMjAjYFIEIgCmYrPZZLfbTT+XWXJysgzDuOT2mJgYZWZm1rQ0AH5CIAJgOjabjXnFAPgVN1UDAICgRyACAABBj0AEAACCHoEIAAAEPQIRAAAIegQiAAAQ9AhEAAAg6DEOEQDTcTgcph+YEcDVhUAEwFQcDocSExPlcrn81qfFYpHdbicUAUGMQATAVJxOp1wul2ZMzFLL5gk+72/PvnyNmzJUTqfTo0CUk5OjmTNnKjc3V0VFRcrOzlb//v3d20tKSjRu3Dh9/PHHOnr0qHr27Km5c+eqTZs2PvgUAGqKQATAlFo2T1C7tp0CXcYlnThxQh07dtTw4cM1YMCAStsMw1D//v1Vp04dffDBB4qMjNSsWbPUu3dv7dy5U/Xq1QtQ1QAuhUAEANWQkpKilJSUi27bvXu3Nm/erB07dqh9+/aSpIyMDMXExOhvf/ubHnzwQX+WCqAKeMoMALzs9OnTkqTw8HD3ulq1aqlu3brasGFDoMoCcBkEIgDwsoSEBNlsNo0fP15HjhzRmTNnNGPGDB04cEBFRUWBLg/ARRCIAMDL6tSpoyVLlujrr79WdHS0LBaL1q5dq5SUFNWqxY9dwIy4hwgAfKBz587Ky8vTsWPHdObMGTVq1EjdunVTly5dAl0agIvgqwoA+FBUVJQaNWqk3bt3a+vWrbr33nsDXRKAi+AMEQBT2rMv39T9lJWVqaCgwL1cWFiovLw8RUdHy2az6f3331ejRo1ks9n05Zdf6oknnlD//v3Vp08fb5UOVOLrEd6v9hHdCUQATMVqtcpisWjclKF+69NischqtXr0nq1bt6pXr17u5fT0dElSamqqsrKyVFRUpPT0dJWUlCg2Nla///3vNXHiRK/WDZznjxHer/YR3QlEAEzFZrPJbrebfi6z5ORkGYZxye2PP/64Hn/88ZqWBlTJ+RHes8ZOVKKtudf3b3fs09CZUzwe0f1KQiACYDo2m+2q/aEL+FKirbk6tW4b6DKuSNxUDQAAgh6BCAAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNAjEAEAgKDHOEQATMfXUxD81NU+JQGAn0cgAmAq/piC4Kc8nZJg2rRpWrJkifLz8xUREaHu3btrxowZatv2/wbEO3XqlP74xz9q8eLFOn36tPr27as//elPatKkia8+BoAaIBABMJXzUxDMHZ+lNrYEn/e325Gvx6YN9WhKgvXr1ystLU1du3bVuXPnNGHCBPXp00c7d+5UvXr1JEljxozR8uXL9f777ysqKkqjRo3SgAED9Omnn/ry4wCoJgIRAFNqY0vQDW06BbqMi1qxYkWl5aysLDVu3Fi5ubnq2bOnjh07pgULFmjRokX6xS9+IUnKzMxUYmKiNm/erFtvvTUQZQO4DG6qBoAaOnbsmCQpOjpakpSbm6uzZ8+qd+/e7jYJCQmy2WzatGlTQGoEcHkEIgCogYqKCo0ePVo9evRQhw4dJEnFxcUKCwtTgwYNKrVt0qSJiouLA1AlgJ/DJTMAqIG0tDTt2LFDGzZsCHQpAGqAM0QAUE2jRo3SsmXLtHbtWjVt2tS9PiYmRmfOnNHRo0crtS8pKVFMTIyfqwRQFQQiAPCQYRgaNWqUsrOztWbNGsXHx1fa3rlzZ9WpU0erV692r9u1a5ccDoeSkpL8XS6AKuCSGQB4KC0tTYsWLdIHH3yg+vXru+8LioqKUkREhKKiojRixAilp6crOjpakZGReuyxx5SUlMQTZoBJEYgAmNJuR75p+8nIyJAkJScnV1qfmZmpoUOHSpJmz56tWrVqaeDAgZUGZgRgTgQiAKZitVplsVj02LShfuvTYrHIarVWub1hGD/bJjw8XPPmzdO8efNqUhoAPyEQATAVm80mu93OXGYA/IpABMB0bDYbAQWAX/GUGQAACHoEIgAAEPQIRAAAIOgRiAAAQNAjEAEAgKBHIAIAAEGPQAQAAIIe4xABMB2Hw8HAjAD8ikAEwFQcDocSExPlcrn81qfFYpHdbq9yKJo2bZqWLFmi/Px8RUREqHv37poxY4batm3rbvPGG29o0aJF2rZtm44fP64jR46oQYMGPvoEAGqKQATAVJxOp1wul94anaW2TRN83t+uA/kaPmeonE5nlQPR+vXrlZaWpq5du+rcuXOaMGGC+vTpo507d6pevXqSJJfLpbvuukt33XWXxo8f78uPAMALAhqIqvIt69SpU/rjH/+oxYsXV5oxukmTJu42DodDjzzyiNauXatrrrlGqampmjZtmmrX/r+Pt27dOqWnp+urr75Ss2bN9Mwzz7hnpQZgPm2bJqhTq06BLuOiVqxYUWk5KytLjRs3Vm5urnr27ClJGj16tKQffvYA5/nqcrDdbvf6PoNNQANRVb5ljRkzRsuXL9f777+vqKgojRo1SgMGDNCnn34qSSovL1e/fv0UExOjjRs3qqioSL///e9Vp04dvfjii5KkwsJC9evXTw8//LDeeecdrV69Wg8++KBiY2PVt2/fgH1+AFeHY8eOSZKio6MDXAnMzB+Xg8vKyny276tdQAPRz33LOnbsmBYsWKBFixbpF7/4hSQpMzNTiYmJ2rx5s2699VZ9/PHH2rlzpz755BM1adJEN910k6ZMmaJx48Zp8uTJCgsL0/z58xUfH69XXnlFkpSYmKgNGzZo9uzZBCIANVJRUaHRo0erR48e6tChQ6DLgYmdvxycNXaiEm3Nvbrvj7Zs1uS//kWnTp3y6n6DianuIfrpt6zc3FydPXtWvXv3drdJSEiQzWbTpk2bdOutt2rTpk264YYbKl1C69u3rx555BF99dVX6tSpkzZt2lRpH+fbnD+lDQDVlZaWph07dmjDhg2BLgVXiERbc3Vq3fbnG3ogf/8+r+4vGJkmEF3sW1ZxcbHCwsIueDKjSZMmKi4udrf5cRg6v/38tsu1KS0t1cmTJxUREVFp2+nTp3X69Gn3cmlpac0/IICrzqhRo7Rs2TLl5OSoadOmgS4HQA2YZmDG89+yFi9eHOhSNG3aNEVFRblfzZo1C3RJAEzEMAyNGjVK2dnZWrNmjeLj4wNdEoAaMkUgOv8ta+3atZW+ZcXExOjMmTM6evRopfYlJSWKiYlxtykpKblg+/ltl2sTGRl5wdkhSRo/fryOHTvmfu3fv7/GnxHA1SMtLU1vv/22Fi1apPr166u4uFjFxcU6efKku01xcbHy8vJUUFAgSfryyy+Vl5enw4cPB6psAJcR0EtmhmHoscceU3Z2ttatW3fBt6zOnTurTp06Wr16tQYOHChJ2rVrlxwOh5KSkiRJSUlJmjp1qg4dOqTGjRtLklatWqXIyEi1a9fO3ebDDz+stO9Vq1a59/FTdevWVd26db36WQF4ZteBfNP2k5GRIUlKTk6utD4zM9M9nMf8+fP13HPPubedfxz/x20AmEdAA1FaWpoWLVqkDz74wP0tS5KioqIUERGhqKgojRgxQunp6YqOjlZkZKQee+wxJSUl6dZbb5Uk9enTR+3atdOQIUP00ksvqbi4WM8884zS0tLcoebhhx/W66+/rieffFLDhw/XmjVr9N5772n58uUB++wALs5qtcpisWj4nKF+69NischqtVa5vWEYP9tm8uTJmjx5cg2qAuBPAQ1EVfmWNXv2bNWqVUsDBw6sNDDjeaGhoVq2bJkeeeQRJSUlqV69ekpNTdXzzz/vbhMfH6/ly5drzJgxevXVV9W0aVP95S9/4ZF7wIRsNpvsdjtzmQHwq4BfMvs54eHhmjdvnubNm3fJNs2bN7/gkthPJScna/v27R7XCMD/bDYbAQWAX5nipmoAAIBAIhABAICgRyACAABBj0AEAACCHoEIAAAEPQIRAAAIegQiAAAQ9Ewz2z0AnOdwOBiYEYBfEYgAmIrD4VBiYqJcLpff+rRYLLLb7VUORdOmTdOSJUuUn5+viIgIde/eXTNmzFDbtm0lSYcPH9akSZP08ccfy+FwqFGjRurfv7+mTJmiqKgoX34UANVEIAJgKk6nUy6XS1ljMpTQ9Hqf95d/4GsNnf2InE5nlQPR+vXrlZaWpq5du+rcuXOaMGGC+vTpo507d6pevXo6ePCgDh48qJdfflnt2rXTvn379PDDD+vgwYP6+9//7uNPBKA6CEQATCmh6fXq1KpjoMu4qBUrVlRazsrKUuPGjZWbm6uePXuqQ4cO+sc//uHe3qpVK02dOlW/+93vdO7cOdWuzY9ewGy4qRoAaujYsWOSpOjo6Mu2iYyMJAwBJkUgAoAaqKio0OjRo9WjRw916NDhom2cTqemTJmikSNH+rk6AFXFVxUAqIG0tDTt2LFDGzZsuOj20tJS9evXT+3atdPkyZP9WxyAKiMQAUA1jRo1SsuWLVNOTo6aNm16wfbjx4/rrrvuUv369ZWdna06deoEoEoAVcElMwDwkGEYGjVqlLKzs7VmzRrFx8df0Ka0tFR9+vRRWFiYli5dqvDw8ABUCqCqOEMEAB5KS0vTokWL9MEHH6h+/foqLi6WJEVFRSkiIsIdhlwul95++22VlpaqtLRUktSoUSOFhoYGsnwAF0EgAmBK+Qe+Nm0/GRkZkqTk5ORK6zMzMzV06FBt27ZNn332mSSpdevWldoUFhaqRYsW1aoVgO8QiACYitVqlcVi0dDZj/itT4vFIqvVWuX2hmFcdntycvLPtgFgLgQiAKZis9lkt9uZywyAXxGIAJiOzWYjoADwK54yAwAAQY9ABAAAgh6BCAAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNBjHCIApuNwOBiYEYBfEYgAmIrD4VBiYqJcLpff+rRYLLLb7VUORdOmTdOSJUuUn5+viIgIde/eXTNmzFDbtm3dbf7whz/ok08+0cGDB3XNNde42yQkJPjqYwCoAQIRAFNxOp1yuVzK+uNsJTZt/fNvqCH7gQINfWWMnE5nlQPR+vXrlZaWpq5du+rcuXOaMGGC+vTpo507d6pevXqSpM6dO2vw4MGy2Ww6fPiwJk+erD59+qiwsJDZ7gETIhABMKXEpq3VqXWHQJdxUStWrKi0nJWVpcaNGys3N1c9e/aUJI0cOdK9vUWLFnrhhRfUsWNH7d27V61atfJrvQB+HjdVA0ANHTt2TJIUHR190e0nTpxQZmam4uPj1axZM3+WBqCKCEQAUAMVFRUaPXq0evTooQ4dKp/R+tOf/qRrrrlG11xzjT766COtWrVKYWFhAaoUwOUQiACgBtLS0rRjxw4tXrz4gm2DBw/W9u3btX79el1//fW6//77derUqQBUCeDncA8RAFTTqFGjtGzZMuXk5Khp06YXbI+KilJUVJTatGmjW2+9Vddee62ys7P1wAMPBKBaAJdDIAIADxmGoccee0zZ2dlat26d4uPjq/QewzB0+vRpP1QIwFMEIgDwUFpamhYtWqQPPvhA9evXV3FxsaQfzghFRERoz549evfdd9WnTx81atRIBw4c0PTp0xUREaG77747wNUDuBgCEQBTsh8oMG0/GRkZkqTk5ORK6zMzMzV06FCFh4fr3//+t+bMmaMjR46oSZMm6tmzpzZu3KjGjRt7o2wAXkYgAmAqVqtVFotFQ18Z47c+LRaLrFZrldsbhnHZ7XFxcfrwww9rWhYAPyIQATAVm80mu93OXGYA/IpABMB0bDYbAQUwIbvd7rN9B/qLCYEIAABcVvHh7xUi6Xe/+53P+vB0kmVvIxABAIDLOnqiTIakVx96TLfe2NHr+7c79mnozCkeTbLsbQQiAABQJa3jrlOn1m0DXYZPMHUHAAAIegQiAAAQ9AhEAAAg6BGIAABA0OOmagCm43A4GJgRgF8RiACYisPhUGJiolwul9/69HT8k2nTpmnJkiXKz89XRESEunfvrhkzZqht2wufvjEMQ3fffbdWrFih7Oxs9e/f38vVA/AGAhEAU3E6nXK5XMr6n2lKbBbv8/7s+ws19OXxHo1/sn79eqWlpalr1646d+6cJkyYoD59+mjnzp2qV69epbZz5sxRSEiIL0oH4EUEIgCmlNgsXp1atwt0GRe1YsWKSstZWVlq3LixcnNz1bNnT/f6vLw8vfLKK9q6datiY2P9XSYAD3BTNQDU0LFjxyRJ0dHR7nUul0u//e1vNW/ePMXExASqNABVRCACgBqoqKjQ6NGj1aNHD3Xo0MG9fsyYMerevbvuvffeAFYHoKq4ZAYANZCWlqYdO3Zow4YN7nVLly7VmjVrtH379gBWBsATnCECgGoaNWqUli1bprVr16pp06bu9WvWrNE333yjBg0aqHbt2qpd+4fvngMHDlRycnKAqgVwOZwhAgAPGYahxx57TNnZ2Vq3bp3i4ys/DffUU0/pwQcfrLTuhhtu0OzZs3XPPff4s1QAVUQgAgAPpaWladGiRfrggw9Uv359FRcXS5KioqIUERGhmJiYi95IbbPZLghPAMyBQATAlOz7C03bT0ZGhiRdcPkrMzNTQ4cO9UJVAPyNQATAVKxWqywWi4a+PN5vfVosFlmt1iq3NwzD4z6q8x74ny+njbHb7T7ZL7yDQATAVGw2m+x2O3OZwe/8NW1MWVmZT/eP6iEQmcS+Iy59WXTMJ/s9b9u2bV7fP79I4As2m42/V/A797QxYycq0dbc6/v/aMtmTf7rX3Tq1Cmv7xs1RyAygRBJz6/Nl9b6bv+GpM6dO3t9355OigkAZpdoa65OrS+cqLem8vfv8/o+4T0EIhMwJD3fs4duj/f+8P7vflGg+dv/o3tbt9eEJ8Z4dd92xz4NnTnFo0kxAQAwIwKRSbSIilLHmEZe3++/9/zwOHDDiHo++cYDAMDVgECEGvPlkxPcoxQceAKLYwAEGoEI1VZ8+HuFSPrd737nsz7Cw8P197//XbGxsT7ZP4ErsOrUqaOQkBB99913atSokUJCQgJdUkAYhqHvvvtOISEhqlOnTqDLAYISgQjVdvREmQxJrz70mG69saPX979hxxf6nz+/pv/6r//y+r7P83XgOn36tOrWreuTfV8NYS40NFRNmzbVgQMHtHfv3kCXE1AhISFq2rSpQkNDA11KjfhyHB/Jt/+mGCcouBGIUGOt467z2RMZV3rgClGIDPnmUsiVHOak/wt011xzjdq0aaOzZ896df8HDx7UkSNHvLrP86699lrFxcV5dZ916tRxh6ErNVQUFRXp1//9a508ddLr+z7Pl/+mzmOcoOBEIILpXamB6/yYI77Y/5Ue5iTfBjpf/2K+kmuXfP//NmPUH3Vz20Sv79eX/6Z+vH/GCQpOQRWI5s2bp5kzZ6q4uFgdO3bU3LlzdcsttwS6LASYLwOXr/Z/JYc5yT+BTvLNL+YruXbJt/9vz+/b1rDRFfdv6sf7x6UdOXJUxUVFXt+vP0emv5SgCUTvvvuu0tPTNX/+fHXr1k1z5sxR3759tWvXLjVu3DjQ5QHVciWGufP790eg88Uv5iu5dsn3QR1Xp/PTmaxZu0b2z7d6ff9FJ0p/+K8PwlZVBU0gmjVrlh566CENGzZMkjR//nwtX75cb731lp566qkAVwcEpyv5m/6VXDvgqdLS45Kk6AZ11CrO4vX9Vxw6LUkqKCjw+r6rKsQIgsEvzpw5I4vFor///e/q37+/e31qaqqOHj2qDz74oFL706dP6/Tp0+7lY8eOyWazaf/+/YqMjPRqbVFRUZKkUZ0764ZG0V7dtySt3ntA7+XbdXfLBEWH1/PqvguPfq9PD+71yb7Zf+D2zf4Dt+8rff9Xcu1X+v79Vft9bdspNvIar++/pOyE/mH/SqNGjdLUqVO9tt/S0lI1a9ZMR48edf++vSQjCHz77beGJGPjxo2V1o8dO9a45ZZbLmg/adIkQz/MqMGLFy9evHjxusJf+/fv/9msEDSXzDwxfvx4paenu5crKip0+PBhNWzY0OsDx51Pr744+4T/w3H2D46zf3Cc/Ydj7R++Os6GYej48eNVGiYjKAKR1WpVaGioSkpKKq0vKSlRTMyFE6rWrVv3gjE6GjRo4MsSFRkZyT82P+A4+wfH2T84zv7DsfYPXxznn71U9v/V8mqvJhUWFqbOnTtr9erV7nUVFRVavXq1kpKSAlgZAAAwg6A4QyRJ6enpSk1NVZcuXXTLLbdozpw5OnHihPupMwAAELyCJhD95je/0Xfffadnn31WxcXFuummm7RixQo1adIkoHXVrVtXkyZN8ukUCeA4+wvH2T84zv7DsfYPMxznoHjsHgAA4HKC4h4iAACAyyEQAQCAoEcgAgAAQY9ABAAAgh6ByA/mzZunFi1aKDw8XN26ddPnn39+2fbvv/++EhISFB4erhtuuEEffvihnyq9snlynN98803dfvvtuvbaa3Xttdeqd+/eP/v/BT/w9O/zeYsXL1ZISEil+QRxaZ4e56NHjyotLU2xsbGqW7eurr/+en52VJGnx3rOnDlq27atIiIi1KxZM40ZM0anTp3yU7VXnpycHN1zzz2Ki4tTSEiI/vnPf/7se9atW6ebb75ZdevWVevWrZWVleXzOoNiLrNAWrx4sREWFma89dZbxldffWU89NBDRoMGDYySkpKLtv/000+N0NBQ46WXXjJ27txpPPPMM0adOnWML7/80s+VX1k8Pc6//e1vjXnz5hnbt2837Ha7MXToUCMqKso4cOCAnyu/snh6nM8rLCw0rrvuOuP222837r33Xv8UewXz9DifPn3a6NKli3H33XcbGzZsMAoLC41169YZeXl5fq78yuPpsX7nnXeMunXrGu+8845RWFhorFy50oiNjTXGjBnj58qvHB9++KHx9NNPG0uWLDEkGdnZ2Zdtv2fPHsNisRjp6enGzp07jblz5xqhoaHGihUrfFongcjHbrnlFiMtLc29XF5ebsTFxRnTpk27aPv777/f6NevX6V13bp1M/7whz/4tM4rnafH+afOnTtn1K9f31i4cKGvSrwqVOc4nzt3zujevbvxl7/8xUhNTSUQVYGnxzkjI8No2bKlcebMGX+VeNXw9FinpaUZv/jFLyqtS09PN3r06OHTOq8WVQlETz75pNG+fftK637zm98Yffv29WFlhsElMx86c+aMcnNz1bt3b/e6WrVqqXfv3tq0adNF37Np06ZK7SWpb9++l2yP6h3nn3K5XDp79qyio6N9VeYVr7rH+fnnn1fjxo01YsQIf5R5xavOcV66dKmSkpKUlpamJk2aqEOHDnrxxRdVXl7ur7KvSNU51t27d1dubq77stqePXv04Ycf6u677/ZLzcEgUL8Hg2ak6kBwOp0qLy+/YDTsJk2aKD8//6LvKS4uvmj74uJin9V5pavOcf6pcePGKS4u7oJ/hPg/1TnOGzZs0IIFC5SXl+eHCq8O1TnOe/bs0Zo1azR48GB9+OGHKigo0KOPPqqzZ89q0qRJ/ij7ilSdY/3b3/5WTqdTt912mwzD0Llz5/Twww9rwoQJ/ig5KFzq92BpaalOnjypiIgIn/TLGSIEvenTp2vx4sXKzs5WeHh4oMu5ahw/flxDhgzRm2++KavVGuhyrmoVFRVq3Lix3njjDXXu3Fm/+c1v9PTTT2v+/PmBLu2qs27dOr344ov605/+pG3btmnJkiVavny5pkyZEujSUEOcIfIhq9Wq0NBQlZSUVFpfUlKimJiYi74nJibGo/ao3nE+7+WXX9b06dP1ySef6MYbb/RlmVc8T4/zN998o7179+qee+5xr6uoqJAk1a5dW7t27VKrVq18W/QVqDp/n2NjY1WnTh2Fhoa61yUmJqq4uFhnzpxRWFiYT2u+UlXnWE+cOFFDhgzRgw8+KEm64YYbdOLECY0cOVJPP/20atXiPENNXer3YGRkpM/ODkmcIfKpsLAwde7cWatXr3avq6io0OrVq5WUlHTR9yQlJVVqL0mrVq26ZHtU7zhL0ksvvaQpU6ZoxYoV6tKliz9KvaJ5epwTEhL05ZdfKi8vz/361a9+pV69eikvL0/NmjXzZ/lXjOr8fe7Ro4cKCgrcgVOSvv76a8XGxhKGLqM6x9rlcl0Qes4HUYOpQb0iYL8HfXrLNozFixcbdevWNbKysoydO3caI0eONBo0aGAUFxcbhmEYQ4YMMZ566il3+08//dSoXbu28fLLLxt2u92YNGkSj91XgafHefr06UZYWJjx97//3SgqKnK/jh8/HqiPcEXw9Dj/FE+ZVY2nx9nhcBj169c3Ro0aZezatctYtmyZ0bhxY+OFF14I1Ee4Ynh6rCdNmmTUr1/f+Nvf/mbs2bPH+Pjjj41WrVoZ999/f6A+gukdP37c2L59u7F9+3ZDkjFr1ixj+/btxr59+wzDMIynnnrKGDJkiLv9+cfux44da9jtdmPevHk8dn+1mDt3rmGz2YywsDDjlltuMTZv3uzedscddxipqamV2r/33nvG9ddfb4SFhRnt27c3li9f7ueKr0yeHOfmzZsbki54TZo0yf+FX2E8/fv8YwSiqvP0OG/cuNHo1q2bUbduXaNly5bG1KlTjXPnzvm56iuTJ8f67NmzxuTJk41WrVoZ4eHhRrNmzYxHH33UOHLkiP8Lv0KsXbv2oj9vzx/X1NRU44477rjgPTfddJMRFhZmtGzZ0sjMzPR5nSGGwTk+AAAQ3LiHCAAABD0CEQAACHoEIgAAEPQIRAAAIOgRiAAAQNAjEAEAgKBHIAIAAEGPQAQAAIIegQgAAAQ9AhEAAAh6BCIAABD0CEQAACDo/T80ZJFIx7/nWQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(t_1_test_scaled)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "train_scaled_2, test_scaled_1 = scale_data2(t_1_train, t_1_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.histplot(test_scaled_1)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2875, 25)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t_1_train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.0" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "t_1_train[:, 0].min()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.48529729631535756 0.7426486481576787\n", + "-1.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0019739897816999537 0.0019739897816999537\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.00034835113794705065 0.00034835113794705065\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0015095215977705527 0.0015095215977705527\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.008941012540640966 0.008941012540640966\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.006967022758941012 0.006967022758941012\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.00023223409196470042 0.00023223409196470042\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.00023223409196470042 0.00023223409196470042\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.00011611704598235021 0.00011611704598235021\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.00011611704598235021 0.00011611704598235021\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0013934045517882026 0.0013934045517882026\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0013934045517882026 0.0013934045517882026\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0006967022758941013 0.0006967022758941013\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0024384579656293545 0.0024384579656293545\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.002206223873664654 0.002206223873664654\n", + "0.0 1.0\n", + "0.0 1.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n", + "0.0 0.0\n", + "0.0 0.0\n", + "0.0 0.0\n", + "-------------\n" + ] + } + ], + "source": [ + "for feature in range(0, 25):\n", + " print(f'{t_1_test[:, feature].mean()} {t_1_test_scaled[:, feature].mean()}')\n", + " print(f'{t_1_test[:, feature].min()} {t_1_test[:, feature].max()}')\n", + " print(f'{t_1_test_scaled[:, feature].min()} {t_1_test_scaled[:, feature].max()}')\n", + " print('-------------')\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py3.9test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "27efe6010b91a164a18a011cd71b7afbe2f076e5b83b7f8099f414d97e11e710" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/data/preprocess_smd.ipynb b/subject1-4/AdaDiff/data/preprocess_smd.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..0126dc7c7e42968ac267d51d5ad446e744849e0f --- /dev/null +++ b/subject1-4/AdaDiff/data/preprocess_smd.ipynb @@ -0,0 +1,263 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'/root/Diff-Anomaly/TranAD/data'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "smd_dir = '../processed/SMD/'" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "84\n", + "(28479, 38)\n", + "normalized: machine-1-1_test.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-3-2_labels.npy 0.0 - 1.0\n", + "(28743, 38)\n", + "normalized: machine-2-6_train.npy 0.0 - 1.0\n", + "(23693, 38)\n", + "normalized: machine-3-10_test.npy 0.0 - 1.0\n", + "(23700, 38)\n", + "normalized: machine-2-2_labels.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-1-6_labels.npy 0.0 - 1.0\n", + "(23694, 38)\n", + "normalized: machine-1-2_test.npy 0.0 - 1.0\n", + "(23696, 38)\n", + "normalized: machine-2-7_test.npy 0.0 - 1.0\n", + "(28479, 38)\n", + "normalized: machine-1-1_train.npy 0.0 - 1.0\n", + "(23702, 38)\n", + "normalized: machine-1-3_train.npy 0.0 - 1.0\n", + "(28696, 38)\n", + "normalized: machine-3-11_test.npy 0.0 - 1.0\n", + "(28700, 38)\n", + "normalized: machine-3-1_labels.npy 0.0 - 1.0\n", + "(28705, 38)\n", + "normalized: machine-3-7_test.npy 0.0 - 1.0\n", + "(28713, 38)\n", + "normalized: machine-3-9_test.npy 0.0 - 1.0\n", + "(28695, 38)\n", + "normalized: machine-3-11_train.npy 0.0 - 1.0\n", + "(23688, 38)\n", + "normalized: machine-2-3_train.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-4_labels.npy 0.0 - 1.0\n", + "(23699, 38)\n", + "normalized: machine-1-8_test.npy 0.0 - 1.0\n", + "(23694, 38)\n", + "normalized: machine-2-1_labels.npy 0.0 - 1.0\n", + "(23706, 38)\n", + "normalized: machine-1-4_train.npy 0.0 - 1.0\n", + "(23702, 38)\n", + "normalized: machine-3-2_train.npy 0.0 - 1.0\n", + "(23694, 38)\n", + "normalized: machine-1-2_train.npy 0.0 - 1.0\n", + "(28705, 38)\n", + "normalized: machine-3-7_train.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-3-3_test.npy 0.0 - 1.0\n", + "(23707, 38)\n", + "normalized: machine-1-4_labels.npy 0.0 - 1.0\n", + "(28743, 38)\n", + "normalized: machine-2-6_test.npy 0.0 - 1.0\n", + "(28479, 38)\n", + "normalized: machine-1-1_labels.npy 0.0 - 1.0\n", + "(23699, 38)\n", + "normalized: machine-2-2_train.npy 0.0 - 1.0\n", + "(28704, 38)\n", + "normalized: machine-3-8_test.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-1-6_test.npy 0.0 - 1.0\n", + "(23698, 38)\n", + "normalized: machine-1-8_train.npy 0.0 - 1.0\n", + "(28722, 38)\n", + "normalized: machine-2-9_labels.npy 0.0 - 1.0\n", + "(23699, 38)\n", + "normalized: machine-1-8_labels.npy 0.0 - 1.0\n", + "(23706, 38)\n", + "normalized: machine-1-5_labels.npy 0.0 - 1.0\n", + "(23687, 38)\n", + "normalized: machine-3-4_train.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-5_labels.npy 0.0 - 1.0\n", + "(28726, 38)\n", + "normalized: machine-3-6_labels.npy 0.0 - 1.0\n", + "(23693, 38)\n", + "normalized: machine-3-10_labels.npy 0.0 - 1.0\n", + "(23697, 38)\n", + "normalized: machine-1-7_train.npy 0.0 - 1.0\n", + "(23690, 38)\n", + "normalized: machine-3-5_train.npy 0.0 - 1.0\n", + "(23694, 38)\n", + "normalized: machine-2-1_test.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-2-8_test.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-3_labels.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-3_test.npy 0.0 - 1.0\n", + "(23691, 38)\n", + "normalized: machine-3-5_labels.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-5_test.npy 0.0 - 1.0\n", + "(23687, 38)\n", + "normalized: machine-3-4_labels.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-2-8_labels.npy 0.0 - 1.0\n", + "(23706, 38)\n", + "normalized: machine-1-5_test.npy 0.0 - 1.0\n", + "(23692, 38)\n", + "normalized: machine-3-10_train.npy 0.0 - 1.0\n", + "(23697, 38)\n", + "normalized: machine-1-7_labels.npy 0.0 - 1.0\n", + "(28704, 38)\n", + "normalized: machine-3-8_labels.npy 0.0 - 1.0\n", + "(23700, 38)\n", + "normalized: machine-2-2_test.npy 0.0 - 1.0\n", + "(28700, 38)\n", + "normalized: machine-3-1_test.npy 0.0 - 1.0\n", + "(28705, 38)\n", + "normalized: machine-3-7_labels.npy 0.0 - 1.0\n", + "(28722, 38)\n", + "normalized: machine-2-9_test.npy 0.0 - 1.0\n", + "(28726, 38)\n", + "normalized: machine-3-6_train.npy 0.0 - 1.0\n", + "(23688, 38)\n", + "normalized: machine-1-6_train.npy 0.0 - 1.0\n", + "(23696, 38)\n", + "normalized: machine-2-7_labels.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-3-3_labels.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-4_train.npy 0.0 - 1.0\n", + "(23694, 38)\n", + "normalized: machine-1-2_labels.npy 0.0 - 1.0\n", + "(23705, 38)\n", + "normalized: machine-1-5_train.npy 0.0 - 1.0\n", + "(23689, 38)\n", + "normalized: machine-2-4_test.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-1-3_test.npy 0.0 - 1.0\n", + "(23687, 38)\n", + "normalized: machine-3-4_test.npy 0.0 - 1.0\n", + "(23693, 38)\n", + "normalized: machine-2-1_train.npy 0.0 - 1.0\n", + "(23707, 38)\n", + "normalized: machine-1-4_test.npy 0.0 - 1.0\n", + "(28713, 38)\n", + "normalized: machine-3-9_train.npy 0.0 - 1.0\n", + "(23696, 38)\n", + "normalized: machine-2-7_train.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-3-2_test.npy 0.0 - 1.0\n", + "(23691, 38)\n", + "normalized: machine-3-5_test.npy 0.0 - 1.0\n", + "(28703, 38)\n", + "normalized: machine-3-8_train.npy 0.0 - 1.0\n", + "(28713, 38)\n", + "normalized: machine-3-9_labels.npy 0.0 - 1.0\n", + "(23702, 38)\n", + "normalized: machine-2-8_train.npy 0.0 - 1.0\n", + "(28726, 38)\n", + "normalized: machine-3-6_test.npy 0.0 - 1.0\n", + "(23697, 38)\n", + "normalized: machine-1-7_test.npy 0.0 - 1.0\n", + "(28700, 38)\n", + "normalized: machine-3-1_train.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-1-3_labels.npy 0.0 - 1.0\n", + "(28722, 38)\n", + "normalized: machine-2-9_train.npy 0.0 - 1.0\n", + "(28696, 38)\n", + "normalized: machine-3-11_labels.npy 0.0 - 1.0\n", + "(23688, 38)\n", + "normalized: machine-2-5_train.npy 0.0 - 1.0\n", + "(23703, 38)\n", + "normalized: machine-3-3_train.npy 0.0 - 1.0\n", + "(28743, 38)\n", + "normalized: machine-2-6_labels.npy 0.0 - 1.0\n" + ] + } + ], + "source": [ + "files = os.listdir(smd_dir)\n", + "print(len(files))\n", + "for file in files:\n", + " sample = np.load(smd_dir + file)\n", + " max = sample.max()\n", + " min = sample.min()\n", + " print(sample.shape)\n", + " if min == 0 and max == 1:\n", + " print(f'normalized: {file} {min} - {max}')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py3.9test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "27efe6010b91a164a18a011cd71b7afbe2f076e5b83b7f8099f414d97e11e710" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/data/preprocess_synthetic.ipynb b/subject1-4/AdaDiff/data/preprocess_synthetic.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..a079c7539434efb10a6a503e5ec32d0bb9494d5b --- /dev/null +++ b/subject1-4/AdaDiff/data/preprocess_synthetic.ipynb @@ -0,0 +1,850 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [], + "source": [ + "DATASET = 'pattern_trend10'\n", + "ORIG = 'trend'\n", + "FILE = 'trend10'" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [], + "source": [ + "def abs_normalize(a):\n", + " a = a / np.maximum(np.absolute(a.max(axis=0)), np.absolute(a.min(axis=0)))\n", + " a = np.nan_to_num(a)\n", + " return (a / 2 + 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/.conda/envs/py3.9test/lib/python3.9/site-packages/scipy/__init__.py:138: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.1)\n", + " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion} is required for this version of \"\n" + ] + } + ], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "def scale_data(train, test, validation):\n", + " scaler = MinMaxScaler(feature_range=(0, 1), clip=True).fit(train)\n", + "\n", + " train_scaled = scaler.transform(train)\n", + " test_scaled = scaler.transform(test)\n", + " validation_scaled = scaler.transform(validation)\n", + "\n", + " return train_scaled, test_scaled, validation_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col_0col_1col_2col_3col_4anomaly
0-0.1312322.6078500.0660114.381993-1.8944490
10.3987362.4039650.4427014.554839-1.5307900
20.8091082.1706900.7025473.998227-1.4148470
31.0078881.8746751.0067003.990737-0.9104900
41.3400911.3111061.3379473.413961-0.7203440
\n", + "
" + ], + "text/plain": [ + " col_0 col_1 col_2 col_3 col_4 anomaly\n", + "0 -0.131232 2.607850 0.066011 4.381993 -1.894449 0\n", + "1 0.398736 2.403965 0.442701 4.554839 -1.530790 0\n", + "2 0.809108 2.170690 0.702547 3.998227 -1.414847 0\n", + "3 1.007888 1.874675 1.006700 3.990737 -0.910490 0\n", + "4 1.340091 1.311106 1.337947 3.413961 -0.720344 0" + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_series = pd.read_csv(f'{FILE}_train_test_val.csv')\n", + "time_series.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col_0col_1col_2col_3col_4anomaly
0-0.1312322.6078500.0660114.381993-1.8944490
10.3987362.4039650.4427014.554839-1.5307900
20.8091082.1706900.7025473.998227-1.4148470
31.0078881.8746751.0067003.990737-0.9104900
41.3400911.3111061.3379473.413961-0.7203440
\n", + "
" + ], + "text/plain": [ + " col_0 col_1 col_2 col_3 col_4 anomaly\n", + "0 -0.131232 2.607850 0.066011 4.381993 -1.894449 0\n", + "1 0.398736 2.403965 0.442701 4.554839 -1.530790 0\n", + "2 0.809108 2.170690 0.702547 3.998227 -1.414847 0\n", + "3 1.007888 1.874675 1.006700 3.990737 -0.910490 0\n", + "4 1.340091 1.311106 1.337947 3.413961 -0.720344 0" + ] + }, + "execution_count": 150, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_time_series = time_series[:20000]\n", + "train_time_series.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-15.097620609417833\n", + "85.04304598705383\n" + ] + } + ], + "source": [ + "print(train_time_series['col_4'].min())\n", + "print(train_time_series['col_4'].max())" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col_0col_1col_2col_3col_4anomaly
200000.1134802.5419600.0024184.4137782.5139330
200010.4253302.3527050.4029864.3433632.8619540
200020.5934772.3853190.7958604.2505153.3436780
200030.9907081.6829741.0200783.9477633.4182430
200041.3283721.4523741.2582303.1268273.6719270
\n", + "
" + ], + "text/plain": [ + " col_0 col_1 col_2 col_3 col_4 anomaly\n", + "20000 0.113480 2.541960 0.002418 4.413778 2.513933 0\n", + "20001 0.425330 2.352705 0.402986 4.343363 2.861954 0\n", + "20002 0.593477 2.385319 0.795860 4.250515 3.343678 0\n", + "20003 0.990708 1.682974 1.020078 3.947763 3.418243 0\n", + "20004 1.328372 1.452374 1.258230 3.126827 3.671927 0" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_time_series = time_series[20000:40000]\n", + "test_time_series.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
col_0col_1col_2col_3col_4anomaly
40000-0.0948022.6521710.0189394.550506-6.4912400
400010.3597502.2560460.3312344.466656-6.1598940
400020.8235032.1833440.8220094.316569-5.6821000
400031.0802651.8167901.0566593.858038-5.4638390
400041.2489271.3742571.2385673.136138-5.1982220
\n", + "
" + ], + "text/plain": [ + " col_0 col_1 col_2 col_3 col_4 anomaly\n", + "40000 -0.094802 2.652171 0.018939 4.550506 -6.491240 0\n", + "40001 0.359750 2.256046 0.331234 4.466656 -6.159894 0\n", + "40002 0.823503 2.183344 0.822009 4.316569 -5.682100 0\n", + "40003 1.080265 1.816790 1.056659 3.858038 -5.463839 0\n", + "40004 1.248927 1.374257 1.238567 3.136138 -5.198222 0" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val_time_series = time_series[40000:]\n", + "val_time_series.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "train_labels = train_time_series.anomaly.to_numpy()\n", + "test_labels = test_time_series.anomaly.to_numpy()\n", + "val_labels = val_time_series.anomaly.to_numpy()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train (20000, 5)\n", + "Test: (20000, 5)\n", + "Val: (10000, 5)\n" + ] + } + ], + "source": [ + "train_no_labels = train_time_series.drop(['anomaly'], axis=1).to_numpy()\n", + "test_no_labels = test_time_series.drop(['anomaly'], axis=1).to_numpy()\n", + "val_no_labels = val_time_series.drop(['anomaly'], axis=1).to_numpy()\n", + "print('Train ', train_no_labels.shape)\n", + "print('Test: ', test_no_labels.shape)\n", + "print('Val: ', val_no_labels.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train ratio: 1.01\n", + "Val ratio: 0.8999999999999999\n" + ] + } + ], + "source": [ + "train_ratio = (train_labels.sum() / train_labels.shape[0]) * 100\n", + "val_ratio = (val_labels.sum() / val_labels.shape[0]) * 100\n", + "print('Train ratio: ', train_ratio)\n", + "print('Val ratio: ', val_ratio)" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "normalized dataset\n" + ] + } + ], + "source": [ + "if ORIG != 'pattern_trend': \n", + " train_normalized = abs_normalize(train_no_labels)\n", + " val_normalized = abs_normalize(val_no_labels)\n", + " test_normalized = abs_normalize(test_no_labels)\n", + " print('normalized dataset')" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "def expand_labels(data, labels):\n", + " labels_reshaped = np.zeros_like(data)\n", + " for idx in range(0, len(labels)):\n", + " if labels[idx]:\n", + " labels_reshaped[idx][0:labels_reshaped.shape[1]] = 1\n", + " return labels_reshaped" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "# val_labels_reshaped = expand_labels(val_normalized, val_labels)\n", + "# test_labels_reshaped = expand_labels(test_normalized, test_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# FOR RATIO ABLATION\n", + "#test5 = np.load(f'../processed/{ORIG}/test.npy')\n", + "#labels5 = np.load(f'../processed/{ORIG}/labels.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "def scale_test(test):\n", + " scaler = MinMaxScaler(feature_range=(0, 1)).fit(test)\n", + "\n", + " #train_scaled = scaler.transform(train)\n", + " test_scaled = scaler.transform(test)\n", + "\n", + " return test_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [], + "source": [ + "trend5_series = pd.read_csv('trend_train_test_val.csv')\n", + "test_trend5 = trend5_series[20000:40000]\n", + "#labels5 = test_trend5.anomaly.to_numpy()\n", + "#test_trend5 = test_trend5.drop(['anomaly'], axis=1).to_numpy()\n", + "#labels5 = expand_labels(test_trend5, labels5)" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-97.97744390787436\n", + "-1.0244058283630473\n" + ] + } + ], + "source": [ + "print(test_trend5['col_4'].min())\n", + "print(test_trend5['col_4'].max())" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "#ratio = 1\n", + "#ds = f'pattern_trend{ratio}_test5'\n", + "#test5_normalized = scale_test(train_no_labels, test_trend5)\n", + "test5_normalized = scale_test(test_trend5)\n", + "#np.save(f'../processed/{ds}/test.npy', normalized_test_for_ds)\n", + "#np.save(f'../processed/{ds}/labels.npy', labels_5_expanded)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "normalized trend\n" + ] + } + ], + "source": [ + "# if ORIG == 'pattern_trend':\n", + "# train_normalized, test5_normalized, val_normalized = scale_data(train_no_labels, test5, val_no_labels)\n", + "# print('normalized trend')" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "val_labels_reshaped = expand_labels(val_normalized, val_labels)\n", + "#test_labels_reshaped = expand_labels(test_normalized, test_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "saved trend\n" + ] + } + ], + "source": [ + "import os \n", + "os.makedirs(f'../processed/{DATASET}_test5', exist_ok=True)\n", + "# np.save(f'../processed/{DATASET}_test5/train.npy', train_normalized)\n", + "if ORIG != 'pattern_trend' and ORIG != 'trend':\n", + " np.save(f'../processed/{DATASET}_test5/test.npy', test5)\n", + " print('saved NOT trend')\n", + "else:\n", + " np.save(f'../processed/{DATASET}_test5/test.npy', test5_normalized)\n", + " print('saved trend')\n", + "np.save(f'../processed/{DATASET}_test5/labels.npy', labels5)\n", + "# np.save(f'../processed/{DATASET}_test5/validation.npy', val_normalized)\n", + "# np.save(f'../processed/{DATASET}_test5/labels_validation.npy', val_labels_reshaped)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(20000, 5)" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels_5_reshaped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "train = np.load('../processed/pattern_trend15_test5/train.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.plot(train[:, 4])" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.plot(train[:, 4])" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "np.save(f'../processed/{DATASET}_test5/labels.npy', labels5)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py3.9test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/data/split_val_swat.ipynb b/subject1-4/AdaDiff/data/split_val_swat.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..13168f57b690953f47501c09e65a6617c218106f --- /dev/null +++ b/subject1-4/AdaDiff/data/split_val_swat.ipynb @@ -0,0 +1,1863 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#normal0 = pd.read_excel('SWaT_Dataset_Normal_v0.xlsx')\n", + "normal1 = pd.read_excel('SWaT_Dataset_Normal_v1.xlsx')\n", + "attack = pd.read_excel('SWaT_Dataset_Attack_v0.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "#if not os.path.isfile(directory + 'train.csv'):\n", + "#normal0.to_csv('train.csv', index=None, header=None)\n", + "normal1.to_csv('train1.csv', index=None, header=None)\n", + "attack.to_csv('test.csv', index=None, header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "test = pd.read_csv('test.csv')\n", + "train1 = pd.read_csv('train1.csv')\n", + "#train = pd.read_csv('train.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(496800, 53)" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(495000, 53)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train1.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(449919, 53)" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [], + "source": [ + "train1['Normal/Attack'] = [0 if x == 'Normal' else 1 for x in train1['Normal/Attack']]\n", + "test['Normal/Attack'] = [0 if x == 'Normal' else 1 for x in test['Normal/Attack']]" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 395298\n", + "1 54621\n", + "Name: Normal/Attack, dtype: int64" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['Normal/Attack'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 495000\n", + "Name: Normal/Attack, dtype: int64" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train1['Normal/Attack'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [], + "source": [ + "# trim column names\n", + "train1 = train1.rename(columns=lambda x: x.strip())\n", + "test = test.rename(columns=lambda x: x.strip())" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "449914 0\n", + "449915 0\n", + "449916 0\n", + "449917 0\n", + "449918 0\n", + "Name: Normal/Attack, Length: 449919, dtype: int64" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_labels = test['Normal/Attack']\n", + "test_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "def search_ratio(test_labels, val_len):\n", + " test = test_labels[val_len:]\n", + " val = test_labels[:val_len]\n", + " test_ratio = (np.sum(test) /test.shape[0]) * 100\n", + " val_ratio = (np.sum(val) / val.shape[0]) * 100\n", + " print(f'val ratio: {val_ratio}')\n", + " print(f'test ratio: {test_ratio}')\n", + " print('----')\n", + " return val_ratio, test_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "val ratio: 5.764422168631852\n", + "test ratio: 13.734108285917495\n", + "----\n", + "val ratio: 9.926429730390522\n", + "test ratio: 12.386152599968389\n", + "----\n", + "val ratio: 6.432302278199667\n", + "test ratio: 14.586402662060557\n", + "----\n" + ] + } + ], + "source": [ + "vr, tr = search_ratio(test_labels=test_labels.to_numpy(), val_len=int(0.2 * test.shape[0]))\n", + "vr, tr = search_ratio(test_labels=test_labels.to_numpy(), val_len=int(0.1 * test.shape[0]))\n", + "vr, tr = search_ratio(test_labels=test_labels.to_numpy(), val_len=int(0.3 * test.shape[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "44991" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val_len = int(0.1 * test.shape[0])\n", + "val_len" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(44991, 53)" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation = test[:val_len]\n", + "validation.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 40525\n", + "1 4466\n", + "Name: Normal/Attack, dtype: int64" + ] + }, + "execution_count": 91, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation['Normal/Attack'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + " ..\n", + "44986 0\n", + "44987 0\n", + "44988 0\n", + "44989 0\n", + "44990 0\n", + "Name: Normal/Attack, Length: 44991, dtype: int64" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_labels = validation['Normal/Attack']\n", + "validation_labels" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(404928, 53)" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_clipped = test[val_len:]\n", + "test_clipped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(404928,)" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_labels_clipped = test_labels[val_len:]\n", + "test_labels_clipped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.rcParams['figure.figsize'] = 30, 2\n", + "plt.plot(test_clipped.to_numpy()[:, 4])\n", + "plt.fill_between(np.arange(test_labels_clipped.to_numpy().shape[0]), test_labels_clipped.to_numpy(), color='red', alpha=0.7, linestyle='dashed', linewidth=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(validation.to_numpy()[:, 4])\n", + "plt.fill_between(np.arange(validation_labels.to_numpy().shape[0]), validation_labels.to_numpy(), color='red', alpha=0.7, linestyle='dashed', linewidth=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(train_values[:, 4])" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "# we don't need timestamps or training labels\n", + "train_dropped = train1.drop([\"Timestamp\" , \"Normal/Attack\" ] , axis = 1)\n", + "test_dropped = test_clipped.drop([\"Timestamp\" , \"Normal/Attack\" ] , axis = 1)\n", + "validation_dropped = validation.drop([\"Timestamp\" , \"Normal/Attack\" ] , axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FIT101LIT101MV101P101P102AIT201AIT202AIT203FIT201MV201...FIT504P501P502PIT501PIT502PIT503FIT601P601P602P603
00.0124.3135111251.92268.313446312.79160.01...0.0119.1002310.03.34850.000256111
10.0124.3920111251.92268.313446312.79160.01...0.0119.1002310.03.34850.000256111
20.0124.4705111251.92268.313446312.79160.01...0.0119.1002310.03.34850.000256111
30.0124.6668111251.92268.313446312.79160.01...0.0119.1002310.03.34850.000256111
40.0124.5098111251.92268.313446312.79160.01...0.0119.1002310.03.34850.000256111
\n", + "

5 rows × 51 columns

\n", + "
" + ], + "text/plain": [ + " FIT101 LIT101 MV101 P101 P102 AIT201 AIT202 AIT203 FIT201 \\\n", + "0 0.0 124.3135 1 1 1 251.9226 8.313446 312.7916 0.0 \n", + "1 0.0 124.3920 1 1 1 251.9226 8.313446 312.7916 0.0 \n", + "2 0.0 124.4705 1 1 1 251.9226 8.313446 312.7916 0.0 \n", + "3 0.0 124.6668 1 1 1 251.9226 8.313446 312.7916 0.0 \n", + "4 0.0 124.5098 1 1 1 251.9226 8.313446 312.7916 0.0 \n", + "\n", + " MV201 ... FIT504 P501 P502 PIT501 PIT502 PIT503 FIT601 P601 \\\n", + "0 1 ... 0.0 1 1 9.100231 0.0 3.3485 0.000256 1 \n", + "1 1 ... 0.0 1 1 9.100231 0.0 3.3485 0.000256 1 \n", + "2 1 ... 0.0 1 1 9.100231 0.0 3.3485 0.000256 1 \n", + "3 1 ... 0.0 1 1 9.100231 0.0 3.3485 0.000256 1 \n", + "4 1 ... 0.0 1 1 9.100231 0.0 3.3485 0.000256 1 \n", + "\n", + " P602 P603 \n", + "0 1 1 \n", + "1 1 1 \n", + "2 1 1 \n", + "3 1 1 \n", + "4 1 1 \n", + "\n", + "[5 rows x 51 columns]" + ] + }, + "execution_count": 98, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dropped.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FIT101LIT101MV101P101P102AIT201AIT202AIT203FIT201MV201...FIT504P501P502PIT501PIT502PIT503FIT601P601P602P603
449912.665983533.0524211258.01088.381697330.73572.4411622...0.30458321247.62881.025214186.61880.000064111
449922.644525532.3066211258.01088.381697330.73572.4411622...0.30458321247.54871.025214186.61880.000064111
449932.621785532.1497211258.01088.381697330.73572.4360360...0.30458321247.54871.025214186.53870.000064111
449942.613778531.5216211258.01088.381697330.73572.3386410...0.30458321247.54871.025214186.52270.000064111
449952.613778531.4038211258.01088.381697330.73572.2215100...0.30458321247.54871.025214186.52270.000064111
\n", + "

5 rows × 51 columns

\n", + "
" + ], + "text/plain": [ + " FIT101 LIT101 MV101 P101 P102 AIT201 AIT202 AIT203 \\\n", + "44991 2.665983 533.0524 2 1 1 258.0108 8.381697 330.7357 \n", + "44992 2.644525 532.3066 2 1 1 258.0108 8.381697 330.7357 \n", + "44993 2.621785 532.1497 2 1 1 258.0108 8.381697 330.7357 \n", + "44994 2.613778 531.5216 2 1 1 258.0108 8.381697 330.7357 \n", + "44995 2.613778 531.4038 2 1 1 258.0108 8.381697 330.7357 \n", + "\n", + " FIT201 MV201 ... FIT504 P501 P502 PIT501 PIT502 \\\n", + "44991 2.441162 2 ... 0.304583 2 1 247.6288 1.025214 \n", + "44992 2.441162 2 ... 0.304583 2 1 247.5487 1.025214 \n", + "44993 2.436036 0 ... 0.304583 2 1 247.5487 1.025214 \n", + "44994 2.338641 0 ... 0.304583 2 1 247.5487 1.025214 \n", + "44995 2.221510 0 ... 0.304583 2 1 247.5487 1.025214 \n", + "\n", + " PIT503 FIT601 P601 P602 P603 \n", + "44991 186.6188 0.000064 1 1 1 \n", + "44992 186.6188 0.000064 1 1 1 \n", + "44993 186.5387 0.000064 1 1 1 \n", + "44994 186.5227 0.000064 1 1 1 \n", + "44995 186.5227 0.000064 1 1 1 \n", + "\n", + "[5 rows x 51 columns]" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_dropped.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
FIT101LIT101MV101P101P102AIT201AIT202AIT203FIT201MV201...FIT504P501P502PIT501PIT502PIT503FIT601P601P602P603
02.427057522.8467221262.01618.396437328.63372.4453912...0.30778621250.86521.649953189.59880.000128111
12.446274522.8860221262.01618.396437328.63372.4453912...0.30778621250.86521.649953189.67890.000128111
22.489191522.8467221262.01618.394514328.63372.4423162...0.30861921250.88121.649953189.67890.000128111
32.534350522.9645221262.01618.394514328.63372.4423162...0.30861921250.88121.649953189.61480.000128111
42.569260523.4748221262.01618.394514328.63372.4430852...0.30861921250.88121.649953189.50270.000128111
\n", + "

5 rows × 51 columns

\n", + "
" + ], + "text/plain": [ + " FIT101 LIT101 MV101 P101 P102 AIT201 AIT202 AIT203 \\\n", + "0 2.427057 522.8467 2 2 1 262.0161 8.396437 328.6337 \n", + "1 2.446274 522.8860 2 2 1 262.0161 8.396437 328.6337 \n", + "2 2.489191 522.8467 2 2 1 262.0161 8.394514 328.6337 \n", + "3 2.534350 522.9645 2 2 1 262.0161 8.394514 328.6337 \n", + "4 2.569260 523.4748 2 2 1 262.0161 8.394514 328.6337 \n", + "\n", + " FIT201 MV201 ... FIT504 P501 P502 PIT501 PIT502 PIT503 \\\n", + "0 2.445391 2 ... 0.307786 2 1 250.8652 1.649953 189.5988 \n", + "1 2.445391 2 ... 0.307786 2 1 250.8652 1.649953 189.6789 \n", + "2 2.442316 2 ... 0.308619 2 1 250.8812 1.649953 189.6789 \n", + "3 2.442316 2 ... 0.308619 2 1 250.8812 1.649953 189.6148 \n", + "4 2.443085 2 ... 0.308619 2 1 250.8812 1.649953 189.5027 \n", + "\n", + " FIT601 P601 P602 P603 \n", + "0 0.000128 1 1 1 \n", + "1 0.000128 1 1 1 \n", + "2 0.000128 1 1 1 \n", + "3 0.000128 1 1 1 \n", + "4 0.000128 1 1 1 \n", + "\n", + "[5 rows x 51 columns]" + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_dropped.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [], + "source": [ + "# Transform all columns into float64\n", + "for i in list(train_dropped):\n", + " train_dropped[i]=train_dropped[i].apply(lambda x: str(x).replace(\",\" , \".\"))\n", + "train_dropped = train_dropped.astype(float)\n", + "\n", + "for i in list(test_dropped):\n", + " test_dropped[i]=test_dropped[i].apply(lambda x: str(x).replace(\",\" , \".\"))\n", + "test_dropped = test_dropped.astype(float)\n", + "\n", + "for i in list(validation_dropped):\n", + " validation_dropped[i]=validation_dropped[i].apply(lambda x: str(x).replace(\",\" , \".\"))\n", + "validation_dropped = validation_dropped.astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "train_values = train_dropped.values\n", + "test_values = test_dropped.values\n", + "validation_values = validation_dropped.values" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train min: 0.0\n", + "Train max: 1014.724\n", + "---\n", + "Test min: 0.0\n", + "Test max: 1201.0\n", + "---\n", + "Validation min: 0.0\n", + "Validation max: 1200.0\n" + ] + } + ], + "source": [ + "print(f'Train min: {train_values.min()}')\n", + "print(f'Train max: {train_values.max()}')\n", + "print('---')\n", + "print(f'Test min: {test_values.min()}')\n", + "print(f'Test max: {test_values.max()}')\n", + "print('---')\n", + "print(f'Validation min: {validation_values.min()}')\n", + "print(f'Validation max: {validation_values.max()}')" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0. , 189.8263 , 0. , 1. , 1. ,\n", + " 168.0338 , 8.366317, 285.3371 , 0. , 0. ,\n", + " 1. , 1. , 1. , 1. , 1. ,\n", + " 1. , 0. , 0. , 364.3863 , 0. ,\n", + " 0. , 0. , 0. , 1. , 1. ,\n", + " 148.7599 , 140.8357 , 0. , 243.0146 , 1. ,\n", + " 1. , 1. , 1. , 1. , 7.432902,\n", + " 129.8385 , 244.8731 , 9.536016, 0. , 0. ,\n", + " 0. , 0. , 1. , 1. , 9.468726,\n", + " 0. , 3.14022 , 0. , 1. , 1. ,\n", + " 1. ])" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_values.min(axis=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(495000, 51)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_values.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler\n", + "\n", + "def scale_data(train, test, validation):\n", + " scaler = MinMaxScaler(feature_range=(0, 1), clip=True).fit(train)\n", + " #scaler = MaxAbsScaler().fit(train)\n", + "\n", + " train_scaled = scaler.transform(train)\n", + " test_scaled = scaler.transform(test)\n", + " validation_scaled = scaler.transform(validation)\n", + "\n", + " # train_scaled = scaler.fit_transform(train)\n", + " # validation_scaled = scaler.fit_transform(validation)\n", + " # test_scaled = scaler.fit_transform(test)\n", + "\n", + " return train_scaled, test_scaled, validation_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "train_norm, test_norm, validation_norm = scale_data(train_values, test_values, validation_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-----Dim 0----\n", + "TRAIN original: 0.0, 2.745092\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.760145\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.698972\n", + "VAL norm: 0.0, 0.9831991058951758\n", + "-----Dim 1----\n", + "TRAIN original: 120.6237, 817.5565\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 189.8263, 888.1722\n", + "TEST norm: 0.09929594359743149, 1.0\n", + "VAL original: 488.0688, 925.0323\n", + "VAL norm: 0.5272317503208344, 1.0\n", + "-----Dim 2----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 3----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 4----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 5----\n", + "TRAIN original: 251.6662, 272.5263\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 168.0338, 267.7198\n", + "TEST norm: 0.0, 0.7695840384274284\n", + "VAL original: 257.8826, 266.0856\n", + "VAL norm: 0.2980043240444683, 0.6912430908768403\n", + "-----Dim 6----\n", + "TRAIN original: 8.258652, 8.988273\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 8.366317, 8.73321\n", + "TEST norm: 0.14756291279993405, 0.6504171343752443\n", + "VAL original: 6.0, 8.470778\n", + "VAL norm: 0.0, 0.2907345046263732\n", + "-----Dim 7----\n", + "TRAIN original: 312.2789, 567.4699\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 285.3371, 384.4655\n", + "TEST norm: 0.0, 0.2828728285872151\n", + "VAL original: 314.8423, 336.8111\n", + "VAL norm: 0.010045025098847526, 0.09613270060464507\n", + "-----Dim 8----\n", + "TRAIN original: 0.0, 2.487938\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.826899\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.804857\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 9----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 10----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 11----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 12----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 13----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 14----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 15----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 16----\n", + "TRAIN original: 0.0, 21.0993\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 45.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.006402459, 45.0\n", + "VAL norm: 0.00030344414269667716, 1.0\n", + "-----Dim 17----\n", + "TRAIN original: 0.0, 2.358774\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.376197\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.352112\n", + "VAL norm: 0.0, 0.9971756514189151\n", + "-----Dim 18----\n", + "TRAIN original: 132.8185, 1014.724\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 364.3863, 1201.0\n", + "TEST norm: 0.26257665929059293, 1.0\n", + "VAL original: 732.1039, 1200.0\n", + "VAL norm: 0.6795347120524817, 1.0\n", + "-----Dim 19----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 20----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 21----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 22----\n", + "TRAIN original: 0.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 23----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 24----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 25----\n", + "TRAIN original: 0.0, 148.8561\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 148.7599, 148.8561\n", + "TEST norm: 0.9993537382747499, 1.0\n", + "VAL original: 148.7695, 148.8561\n", + "VAL norm: 0.9994182300893278, 1.0\n", + "-----Dim 26----\n", + "TRAIN original: 153.7811, 235.7088\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 140.8357, 333.8118\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 151.961, 297.3853\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 27----\n", + "TRAIN original: 0.0, 1.747862\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 1.744914\n", + "TEST norm: 0.0, 0.998313367989006\n", + "VAL original: 0.0, 1.743504\n", + "VAL norm: 0.0, 0.9975066681465699\n", + "-----Dim 28----\n", + "TRAIN original: 130.3896, 1003.935\n", + "TRAIN norm: 0.0, 0.9999999999999999\n", + "TEST original: 243.0146, 1002.781\n", + "TEST norm: 0.12892861664659905, 0.9986789467382003\n", + "VAL original: 572.6224, 1001.935\n", + "VAL norm: 0.5062505051254348, 0.9977104796155986\n", + "-----Dim 29----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 30----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 31----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 32----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 33----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 34----\n", + "TRAIN original: 7.411433, 7.925084\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 7.432902, 8.307037\n", + "TEST norm: 0.04179686207171862, 1.0\n", + "VAL original: 7.831518, 8.219559\n", + "VAL norm: 0.8178412969117161, 1.0\n", + "-----Dim 35----\n", + "TRAIN original: 142.3481, 218.3286\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 129.8385, 260.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 141.8354, 272.8531\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 36----\n", + "TRAIN original: 252.0828, 283.3568\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 244.8731, 297.2635\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 260.0936, 291.0151\n", + "VAL norm: 0.25614887766195515, 1.0\n", + "-----Dim 37----\n", + "TRAIN original: 7.344271, 227.1725\n", + "TRAIN norm: 0.0, 0.9999999999999999\n", + "TEST original: 9.536016, 442.4635\n", + "TEST norm: 0.009970261826564596, 1.0\n", + "VAL original: 9.766726, 18.99513\n", + "VAL norm: 0.011019763071466127, 0.05299983106355279\n", + "-----Dim 38----\n", + "TRAIN original: 0.001281723, 1.757754\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 1.753653\n", + "TEST norm: 0.0, 0.9976652065314663\n", + "VAL original: 1.217636, 1.752628\n", + "VAL norm: 0.6924984202298343, 0.9970816504950736\n", + "-----Dim 39----\n", + "TRAIN original: 0.000640451, 1.361983\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 1.358781\n", + "TEST norm: 0.0, 0.9976479101440325\n", + "VAL original: 0.8169591, 1.360318\n", + "VAL norm: 0.599642352763926, 0.9987769426576558\n", + "-----Dim 40----\n", + "TRAIN original: 0.001664373, 0.7636911\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 0.7415421\n", + "TEST norm: 0.0, 0.9709340903472012\n", + "VAL original: 0.3337708, 0.7391096\n", + "VAL norm: 0.4358199197388519, 0.96774194509322\n", + "-----Dim 41----\n", + "TRAIN original: 0.0, 0.3170099\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 0.0, 0.3116933\n", + "TEST norm: 0.0, 0.9832289149329405\n", + "VAL original: 0.0, 0.3109887\n", + "VAL norm: 0.0, 0.9810062714129748\n", + "-----Dim 42----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 43----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 44----\n", + "TRAIN original: 8.891951, 264.6437\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 9.468726, 254.3418\n", + "TEST norm: 0.0022552142937642267, 0.9597191415492528\n", + "VAL original: 163.8843, 251.522\n", + "VAL norm: 0.6060265456874743, 0.9486936060014978\n", + "-----Dim 45----\n", + "TRAIN original: 0.0, 3.668343\n", + "TRAIN norm: 0.0, 0.9999999999999999\n", + "TEST original: 0.0, 1.826162\n", + "TEST norm: 0.0, 0.4978165891248446\n", + "VAL original: 0.0, 1.970333\n", + "VAL norm: 0.0, 0.5371179848776408\n", + "-----Dim 46----\n", + "TRAIN original: 3.108177, 200.6376\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 3.14022, 191.986\n", + "TEST norm: 0.00016221887105902222, 0.9562009554394335\n", + "VAL original: 134.4687, 190.3679\n", + "VAL norm: 0.6650174996967415, 0.9480092644223439\n", + "-----Dim 47----\n", + "TRAIN original: 0.0, 1.746131\n", + "TRAIN norm: 0.0, 0.9999999999999999\n", + "TEST original: 0.0, 1.80271\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 0.0, 1.742287\n", + "VAL norm: 0.0, 0.9977985615054081\n", + "-----Dim 48----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n", + "-----Dim 49----\n", + "TRAIN original: 1.0, 2.0\n", + "TRAIN norm: 0.0, 1.0\n", + "TEST original: 1.0, 2.0\n", + "TEST norm: 0.0, 1.0\n", + "VAL original: 1.0, 2.0\n", + "VAL norm: 0.0, 1.0\n", + "-----Dim 50----\n", + "TRAIN original: 1.0, 1.0\n", + "TRAIN norm: 0.0, 0.0\n", + "TEST original: 1.0, 1.0\n", + "TEST norm: 0.0, 0.0\n", + "VAL original: 1.0, 1.0\n", + "VAL norm: 0.0, 0.0\n" + ] + } + ], + "source": [ + "for i in range(51):\n", + " print(f'-----Dim {i}----')\n", + " print(f'TRAIN original: {train_values[:, i].min()}, {train_values[:, i].max()}')\n", + " print(f'TRAIN norm: {train_norm[:, i].min()}, {train_norm[:, i].max()}')\n", + " print(f'TEST original: {test_values[:, i].min()}, {test_values[:, i].max()}')\n", + " print(f'TEST norm: {test_norm[:, i].min()}, {test_norm[:, i].max()}')\n", + " print(f'VAL original: {validation_values[:, i].min()}, {validation_values[:, i].max()}')\n", + " print(f'VAL norm: {validation_norm[:, i].min()}, {validation_norm[:, i].max()}')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0 1.0\n", + "0.0 1.0\n", + "0.0 1.0\n", + "(495000, 51) (404928, 51) (44991, 51)\n" + ] + } + ], + "source": [ + "print(train_norm.min(), train_norm.max())\n", + "print(test_norm.min(), test_norm.max())\n", + "print(validation_norm.min(), validation_norm.max())\n", + "print(train_norm.shape, test_norm.shape, validation_norm.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "test_labels_clipped = test_labels_clipped.to_numpy()\n", + "test_labels_reshaped = np.zeros_like(test_norm)\n", + "\n", + "for idx in range(0, len(test_labels_clipped)):\n", + " if test_labels_clipped[idx]:\n", + " # labels_reshaped.shape[1] == 51 aka num_feats\n", + " test_labels_reshaped[idx][0:test_labels_reshaped.shape[1]] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "validation_labels_reshaped = np.zeros_like(validation_norm)\n", + "\n", + "for idx in range(0, len(validation_labels)):\n", + " if validation_labels[idx]:\n", + " # labels_reshaped.shape[1] == 51 aka num_feats\n", + " validation_labels_reshaped[idx][0:validation_labels_reshaped.shape[1]] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(404928, 51) (44991, 51)\n" + ] + } + ], + "source": [ + "print(test_labels_reshaped.shape, validation_labels_reshaped.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "np.save('labels.npy', test_labels_reshaped)\n", + "np.save('labels_validation.npy', validation_labels_reshaped)\n", + "np.save('train.npy', train_norm)\n", + "np.save('test.npy', test_norm)\n", + "np.save('validation.npy', validation_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [], + "source": [ + "load_test = np.load('../processed/SWAT_big/validation.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "labels = np.load('../processed/SWAT_big/labels.npy')\n", + "labels_validation = np.load('../processed/SWAT_big/labels_validation.npy')\n", + "labels = (np.sum(labels, axis=1) >= 1) + 0\n", + "labels_validation = (np.sum(labels_validation, axis=1) >= 1) + 0\n", + "# labels.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "12.140185233342002" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "((labels.sum() + labels_validation.sum()) / (labels.shape[0] + labels_validation.shape[0])) * 100" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1501" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.12386152599968389" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(labels.sum() / labels.shape[0]) " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(44991, 51)" + ] + }, + "execution_count": 113, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "load_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py3.9test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "27efe6010b91a164a18a011cd71b7afbe2f076e5b83b7f8099f414d97e11e710" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/data/split_val_wadi.ipynb b/subject1-4/AdaDiff/data/split_val_wadi.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..fa867f5c1987856a390c7260bb8bda626256f7e2 --- /dev/null +++ b/subject1-4/AdaDiff/data/split_val_wadi.ipynb @@ -0,0 +1,1188 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "train = pd.read_csv('WADI_14days_new.csv')\n", + "test = pd.read_csv('WADI_attackdataLABLE.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(784571, 130)\n", + "(172803, 131)\n" + ] + } + ], + "source": [ + "print(train.shape)\n", + "print(test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(784571, 126)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = train.dropna(axis=1, how='all')\n", + "train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(172803, 127)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test = test.dropna(axis=1, how='all')\n", + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Row', 'Date', 'Time', '1_AIT_001_PV', '1_AIT_002_PV', '1_AIT_003_PV',\n", + " '1_AIT_004_PV', '1_AIT_005_PV', '1_FIT_001_PV', '1_LS_001_AL',\n", + " ...\n", + " '3_MV_001_STATUS', '3_MV_002_STATUS', '3_MV_003_STATUS',\n", + " '3_P_001_STATUS', '3_P_002_STATUS', '3_P_003_STATUS', '3_P_004_STATUS',\n", + " 'LEAK_DIFF_PRESSURE', 'PLANT_START_STOP_LOG',\n", + " 'TOTAL_CONS_REQUIRED_FLOW'],\n", + " dtype='object', length=126)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['Row ', 'Date ', 'Time', '1_AIT_001_PV', '1_AIT_002_PV', '1_AIT_003_PV',\n", + " '1_AIT_004_PV', '1_AIT_005_PV', '1_FIT_001_PV', '1_LS_001_AL',\n", + " ...\n", + " '3_MV_002_STATUS', '3_MV_003_STATUS', '3_P_001_STATUS',\n", + " '3_P_002_STATUS', '3_P_003_STATUS', '3_P_004_STATUS',\n", + " 'LEAK_DIFF_PRESSURE', 'PLANT_START_STOP_LOG',\n", + " 'TOTAL_CONS_REQUIRED_FLOW', 'Attack LABLE (1:No Attack, -1:Attack)'],\n", + " dtype='object', length=127)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " 1 162826\n", + "-1 9977\n", + "Name: Attack LABLE (1:No Attack, -1:Attack), dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['Attack LABLE (1:No Attack, -1:Attack)'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "# trim column names\n", + "train = train.rename(columns=lambda x: x.strip())\n", + "test = test.rename(columns=lambda x: x.strip())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 162826\n", + "1 9977\n", + "Name: Attack LABLE (1:No Attack, -1:Attack), dtype: int64" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test['Attack LABLE (1:No Attack, -1:Attack)'] = [1 if x == -1 else 0 for x in test['Attack LABLE (1:No Attack, -1:Attack)']]\n", + "test['Attack LABLE (1:No Attack, -1:Attack)'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(172803,)\n", + "9977\n" + ] + } + ], + "source": [ + "test_labels = test['Attack LABLE (1:No Attack, -1:Attack)'].values\n", + "print(test_labels.shape)\n", + "print(np.sum(test_labels))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "def search_ratio(test_labels, val_len):\n", + " val = test_labels[:val_len]\n", + " test = test_labels[val_len:]\n", + " test_ratio = (np.sum(test) /test.shape[0]) * 100\n", + " val_ratio = (np.sum(val) / val.shape[0]) * 100\n", + " print(f'val ratio: {val_ratio}')\n", + " print(f'test ratio: {test_ratio}')\n", + " print('----')\n", + " return val_ratio, test_ratio" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "val ratio: 8.686342592592593\n", + "test ratio: 5.449997749529008\n", + "----\n", + "val ratio: 5.790895061728396\n", + "test ratio: 5.770579304616599\n", + "----\n", + "val ratio: 4.343171296296297\n", + "test ratio: 6.131232684475886\n", + "----\n", + "val ratio: 2.895447530864198\n", + "test ratio: 7.007101345039392\n", + "----\n" + ] + } + ], + "source": [ + "vr, tr = search_ratio(test_labels=test_labels, val_len=int(0.1 * test.shape[0]))\n", + "vr, tr = search_ratio(test_labels=test_labels, val_len=int(0.15 * test.shape[0]))\n", + "vr, tr = search_ratio(test_labels=test_labels, val_len=int(0.2 * test.shape[0]))\n", + "vr, tr = search_ratio(test_labels=test_labels, val_len=int(0.3 * test.shape[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "25920" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val_len = int(0.15 * test.shape[0])\n", + "val_len" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(25920, 127)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation = test[:val_len]\n", + "validation.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(146883, 127)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_clipped = test[val_len:]\n", + "test_clipped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 24419\n", + "1 1501\n", + "Name: Attack LABLE (1:No Attack, -1:Attack), dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation['Attack LABLE (1:No Attack, -1:Attack)'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(146883,)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_labels_clipped = test_labels[val_len:]\n", + "test_labels_clipped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(25920,)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_labels = test_labels[:val_len]\n", + "validation_labels.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.plot(test_dropped.to_numpy()[:, 7])\n", + "#plt.fill_between(np.arange(test_labels.shape[0]), test_labels, color='red', alpha=0.3, linestyle='dashed', linewidth=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(172803,)" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_labels.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "test_dropped = test_clipped.drop(['Row', 'Date', 'Time', 'Attack LABLE (1:No Attack, -1:Attack)'], axis=1)\n", + "validation_dropped = validation.drop(['Row', 'Date', 'Time', 'Attack LABLE (1:No Attack, -1:Attack)'], axis=1)\n", + "train_dropped = train.drop(['Row', 'Date', 'Time'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "missing_test = test_dropped.isna().sum()\n", + "missing_val = validation_dropped.isna().sum()\n", + "missing_train = train_dropped.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(784571, 123)" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_dropped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "for x in missing_train:\n", + " print(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "train_dropped = train_dropped.interpolate(method='linear', limit_direction='forward', axis=0)\n", + "train_dropped = train_dropped.fillna(0)\n", + "test_dropped = test_dropped.interpolate(method='linear', limit_direction='forward', axis=0)\n", + "test_dropped = test_dropped.fillna(0)\n", + "validation_dropped = validation_dropped.interpolate(method='linear', limit_direction='forward', axis=0)\n", + "validation_dropped = validation_dropped.fillna(0)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1_AIT_001_PV1_AIT_002_PV1_AIT_003_PV1_AIT_004_PV1_AIT_005_PV1_FIT_001_PV1_LS_001_AL1_LS_002_AL1_LT_001_PV1_MV_001_STATUS...3_MV_001_STATUS3_MV_002_STATUS3_MV_003_STATUS3_P_001_STATUS3_P_002_STATUS3_P_003_STATUS3_P_004_STATUSLEAK_DIFF_PRESSUREPLANT_START_STOP_LOGTOTAL_CONS_REQUIRED_FLOW
120963179.5570.70346311.8693457.2620.2991320.001140.00.052.29841.0...1.01.01.01.01.01.01.063.88291.00.83
120964179.5570.70346311.8693457.2620.2991320.001140.00.052.29841.0...1.01.01.01.01.01.01.063.88291.00.83
120965179.5570.70346311.8693457.2620.2991320.001140.00.052.29841.0...1.01.01.01.01.01.01.063.88291.00.83
120966179.5570.70346311.8693457.2620.2991320.001140.00.052.29841.0...1.01.01.01.01.01.01.063.88291.00.83
120967179.5570.70346311.8693457.2620.2991320.001140.00.052.29841.0...1.01.01.01.01.01.01.063.88291.00.83
\n", + "

5 rows × 123 columns

\n", + "
" + ], + "text/plain": [ + " 1_AIT_001_PV 1_AIT_002_PV 1_AIT_003_PV 1_AIT_004_PV 1_AIT_005_PV \\\n", + "120963 179.557 0.703463 11.8693 457.262 0.299132 \n", + "120964 179.557 0.703463 11.8693 457.262 0.299132 \n", + "120965 179.557 0.703463 11.8693 457.262 0.299132 \n", + "120966 179.557 0.703463 11.8693 457.262 0.299132 \n", + "120967 179.557 0.703463 11.8693 457.262 0.299132 \n", + "\n", + " 1_FIT_001_PV 1_LS_001_AL 1_LS_002_AL 1_LT_001_PV 1_MV_001_STATUS \\\n", + "120963 0.00114 0.0 0.0 52.2984 1.0 \n", + "120964 0.00114 0.0 0.0 52.2984 1.0 \n", + "120965 0.00114 0.0 0.0 52.2984 1.0 \n", + "120966 0.00114 0.0 0.0 52.2984 1.0 \n", + "120967 0.00114 0.0 0.0 52.2984 1.0 \n", + "\n", + " ... 3_MV_001_STATUS 3_MV_002_STATUS 3_MV_003_STATUS \\\n", + "120963 ... 1.0 1.0 1.0 \n", + "120964 ... 1.0 1.0 1.0 \n", + "120965 ... 1.0 1.0 1.0 \n", + "120966 ... 1.0 1.0 1.0 \n", + "120967 ... 1.0 1.0 1.0 \n", + "\n", + " 3_P_001_STATUS 3_P_002_STATUS 3_P_003_STATUS 3_P_004_STATUS \\\n", + "120963 1.0 1.0 1.0 1.0 \n", + "120964 1.0 1.0 1.0 1.0 \n", + "120965 1.0 1.0 1.0 1.0 \n", + "120966 1.0 1.0 1.0 1.0 \n", + "120967 1.0 1.0 1.0 1.0 \n", + "\n", + " LEAK_DIFF_PRESSURE PLANT_START_STOP_LOG TOTAL_CONS_REQUIRED_FLOW \n", + "120963 63.8829 1.0 0.83 \n", + "120964 63.8829 1.0 0.83 \n", + "120965 63.8829 1.0 0.83 \n", + "120966 63.8829 1.0 0.83 \n", + "120967 63.8829 1.0 0.83 \n", + "\n", + "[5 rows x 123 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_dropped.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plt.rcParams['figure.figsize'] = 30, 2\n", + "plt.plot(test_dropped.to_numpy()[:, 7])" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.preprocessing import MinMaxScaler\n", + "\n", + "def scale_data(train, test, validation):\n", + " scaler = MinMaxScaler(feature_range=(0, 1), clip=True).fit(train)\n", + "\n", + " train_scaled = scaler.transform(train)\n", + " test_scaled = scaler.transform(test)\n", + " validation_scaled = scaler.transform(validation)\n", + "\n", + " # train_scaled = scaler.fit_transform(train)\n", + " # validation_scaled = scaler.fit_transform(validation)\n", + " # test_scaled = scaler.fit_transform(test)\n", + "\n", + " return train_scaled, test_scaled, validation_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "train_values = train_dropped.values\n", + "test_values = test_dropped.values\n", + "validation_values = validation_dropped.values" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "train_norm, test_norm, val_norm = scale_data(train_values, test_values, validation_values)" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(784571, 123)" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_norm.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(146883, 123)" + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "labels_reshaped = np.zeros_like(test_values)\n", + "labels_reshaped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(25920,)" + ] + }, + "execution_count": 112, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_labels.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in range(0, len(test_labels_clipped)):\n", + " if test_labels_clipped[idx]:\n", + " # labels_reshaped.shape[1] == 51 aka num_feats\n", + " labels_reshaped[idx][0:labels_reshaped.shape[1]] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(25920, 123)" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "validation_labels_reshaped = np.zeros_like(validation_values)\n", + "validation_labels_reshaped.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "for idx in range(0, len(validation_labels)):\n", + " if validation_labels[idx]:\n", + " # labels_reshaped.shape[1] == 51 aka num_feats\n", + " validation_labels_reshaped[idx][0:validation_labels_reshaped.shape[1]] = 1" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [], + "source": [ + "np.save('labels.npy', labels_reshaped)\n", + "np.save('labels_validation.npy', validation_labels_reshaped)\n", + "np.save('train.npy', train_norm)\n", + "np.save('test.npy', test_norm)\n", + "np.save('validation.npy', val_norm)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "val = np.load('../processed/WADI/validation.npy')\n", + "test = np.load('../processed/WADI/test.npy')\n", + "validation_labels = np.load('../processed/WADI/labels_validation.npy')\n", + "test_labels = np.load('../processed/WADI/labels.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(146883, 123)" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "validation_labels = (np.sum(validation_labels, axis=1) >= 1) + 0\n", + "test_labels = (np.sum(test_labels, axis=1) >= 1) + 0" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.rcParams['figure.figsize'] = 30, 2" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(test[:, 3])\n", + "plt.fill_between(np.arange(test_labels.shape[0]), test_labels, color='red', alpha=0.3, linestyle='dashed', linewidth=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(val[:, 3])\n", + "plt.fill_between(np.arange(validation_labels.shape[0]), validation_labels, color='red', alpha=0.3, linestyle='dashed', linewidth=0.3)" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3139" + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sum(validation_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6838" + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.sum(test_labels_clipped)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "test = np.load('../processed/WADI/test.npy')\n", + "train = np.load('../processed/WADI/train.npy')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(146883, 123)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(784571, 123)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "plt.rcParams['figure.figsize'] = 30, 2\n", + "plt.plot(test[:, 7])\n", + "#plt.fill_between(np.arange(validation_labels.shape[0]), validation_labels, color='red', alpha=0.3, linestyle='dashed', linewidth=0.3)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py3.9test", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "27efe6010b91a164a18a011cd71b7afbe2f076e5b83b7f8099f414d97e11e710" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/denoising_diffusion_pytorch_1d.py b/subject1-4/AdaDiff/denoising_diffusion_pytorch_1d.py new file mode 100644 index 0000000000000000000000000000000000000000..d31054523cf339a01f871596f4fa5958c97974ae --- /dev/null +++ b/subject1-4/AdaDiff/denoising_diffusion_pytorch_1d.py @@ -0,0 +1,829 @@ +import math +from pathlib import Path +from random import random +from functools import partial +from collections import namedtuple +from multiprocessing import cpu_count + +import torch +from torch import nn, einsum, Tensor +import torch.nn.functional as F +from torch.cuda.amp import autocast +from torch.optim import Adam +from torch.utils.data import Dataset, DataLoader + +from einops import rearrange, reduce +from einops.layers.torch import Rearrange + +from accelerate import Accelerator +from ema_pytorch import EMA + +from tqdm.auto import tqdm + + +# constants + +ModelPrediction = namedtuple('ModelPrediction', ['pred_noise', 'pred_x_start']) + +# helpers functions + +def exists(x): + return x is not None + +def default(val, d): + if exists(val): + return val + return d() if callable(d) else d + +def identity(t, *args, **kwargs): + return t + +def cycle(dl): + while True: + for data in dl: + yield data + +def has_int_squareroot(num): + return (math.sqrt(num) ** 2) == num + +def num_to_groups(num, divisor): + groups = num // divisor + remainder = num % divisor + arr = [divisor] * groups + if remainder > 0: + arr.append(remainder) + return arr + +def convert_image_to_fn(img_type, image): + if image.mode != img_type: + return image.convert(img_type) + return image + +# normalization functions + +def normalize_to_neg_one_to_one(img): + return img * 2 - 1 + +def unnormalize_to_zero_to_one(t): + return (t + 1) * 0.5 + +# data + +class Dataset1D(Dataset): + def __init__(self, tensor: Tensor): + super().__init__() + self.tensor = tensor.clone() + + def __len__(self): + return len(self.tensor) + + def __getitem__(self, idx): + return self.tensor[idx].clone() + +# small helper modules + +class Residual(nn.Module): + def __init__(self, fn): + super().__init__() + self.fn = fn + + def forward(self, x, *args, **kwargs): + return self.fn(x, *args, **kwargs) + x + +def Upsample(dim, dim_out = None): + return nn.Sequential( + nn.Upsample(scale_factor = 2, mode = 'nearest'), + nn.Conv1d(dim, default(dim_out, dim), 3, padding = 1) + ) + +def Downsample(dim, dim_out = None): + return nn.Conv1d(dim, default(dim_out, dim), 4, 2, 1) + +class RMSNorm(nn.Module): + def __init__(self, dim): + super().__init__() + self.g = nn.Parameter(torch.ones(1, dim, 1)) + + def forward(self, x): + return F.normalize(x, dim = 1) * self.g * (x.shape[1] ** 0.5) + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.fn = fn + self.norm = RMSNorm(dim) + + def forward(self, x): + x = self.norm(x) + return self.fn(x) + +# sinusoidal positional embeds + +class SinusoidalPosEmb(nn.Module): + def __init__(self, dim, theta = 10000): + super().__init__() + self.dim = dim + self.theta = theta + + def forward(self, x): + device = x.device + half_dim = self.dim // 2 + emb = math.log(self.theta) / (half_dim - 1) + emb = torch.exp(torch.arange(half_dim, device=device) * -emb) + emb = x[:, None] * emb[None, :] + emb = torch.cat((emb.sin(), emb.cos()), dim=-1) + return emb + +class RandomOrLearnedSinusoidalPosEmb(nn.Module): + """ following @crowsonkb 's lead with random (learned optional) sinusoidal pos emb """ + """ https://github.com/crowsonkb/v-diffusion-jax/blob/master/diffusion/models/danbooru_128.py#L8 """ + + def __init__(self, dim, is_random = False): + super().__init__() + assert (dim % 2) == 0 + half_dim = dim // 2 + self.weights = nn.Parameter(torch.randn(half_dim), requires_grad = not is_random) + + def forward(self, x): + x = rearrange(x, 'b -> b 1') + freqs = x * rearrange(self.weights, 'd -> 1 d') * 2 * math.pi + fouriered = torch.cat((freqs.sin(), freqs.cos()), dim = -1) + fouriered = torch.cat((x, fouriered), dim = -1) + return fouriered + +# building block modules + +class Block(nn.Module): + def __init__(self, dim, dim_out, groups = 8): + super().__init__() + self.proj = nn.Conv1d(dim, dim_out, 3, padding = 1) + self.norm = nn.GroupNorm(groups, dim_out) + self.act = nn.SiLU() + + def forward(self, x, scale_shift = None): + x = self.proj(x) + x = self.norm(x) + + if exists(scale_shift): + scale, shift = scale_shift + x = x * (scale + 1) + shift + + x = self.act(x) + return x + +class ResnetBlock(nn.Module): + def __init__(self, dim, dim_out, *, time_emb_dim = None, groups = 8): + super().__init__() + self.mlp = nn.Sequential( + nn.SiLU(), + nn.Linear(time_emb_dim, dim_out * 2) + ) if exists(time_emb_dim) else None + + self.block1 = Block(dim, dim_out, groups = groups) + self.block2 = Block(dim_out, dim_out, groups = groups) + self.res_conv = nn.Conv1d(dim, dim_out, 1) if dim != dim_out else nn.Identity() + + def forward(self, x, time_emb = None): + + scale_shift = None + if exists(self.mlp) and exists(time_emb): + time_emb = self.mlp(time_emb) + time_emb = rearrange(time_emb, 'b c -> b c 1') + scale_shift = time_emb.chunk(2, dim = 1) + + h = self.block1(x, scale_shift = scale_shift) + + h = self.block2(h) + + return h + self.res_conv(x) + +class LinearAttention(nn.Module): + def __init__(self, dim, heads = 4, dim_head = 32): + super().__init__() + self.scale = dim_head ** -0.5 + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv1d(dim, hidden_dim * 3, 1, bias = False) + + self.to_out = nn.Sequential( + nn.Conv1d(hidden_dim, dim, 1), + RMSNorm(dim) + ) + + def forward(self, x): + b, c, n = x.shape + qkv = self.to_qkv(x).chunk(3, dim = 1) + q, k, v = map(lambda t: rearrange(t, 'b (h c) n -> b h c n', h = self.heads), qkv) + + q = q.softmax(dim = -2) + k = k.softmax(dim = -1) + + q = q * self.scale + + context = torch.einsum('b h d n, b h e n -> b h d e', k, v) + + out = torch.einsum('b h d e, b h d n -> b h e n', context, q) + out = rearrange(out, 'b h c n -> b (h c) n', h = self.heads) + return self.to_out(out) + +class Attention(nn.Module): + def __init__(self, dim, heads = 4, dim_head = 32): + super().__init__() + self.scale = dim_head ** -0.5 + self.heads = heads + hidden_dim = dim_head * heads + + self.to_qkv = nn.Conv1d(dim, hidden_dim * 3, 1, bias = False) + self.to_out = nn.Conv1d(hidden_dim, dim, 1) + + def forward(self, x): + b, c, n = x.shape + qkv = self.to_qkv(x).chunk(3, dim = 1) + q, k, v = map(lambda t: rearrange(t, 'b (h c) n -> b h c n', h = self.heads), qkv) + + q = q * self.scale + + sim = einsum('b h d i, b h d j -> b h i j', q, k) + attn = sim.softmax(dim = -1) + out = einsum('b h i j, b h d j -> b h i d', attn, v) + + out = rearrange(out, 'b h n d -> b (h d) n') + return self.to_out(out) + +# model + +class Unet1D(nn.Module): + def __init__( + self, + dim, + init_dim = None, + out_dim = None, + dim_mults=(1, 2, 4, 8), + channels = 3, + self_condition = False, + resnet_block_groups = 8, + learned_variance = False, + learned_sinusoidal_cond = False, + random_fourier_features = False, + learned_sinusoidal_dim = 16, + sinusoidal_pos_emb_theta = 10000, + attn_dim_head = 32, + attn_heads = 4 + ): + super().__init__() + + # determine dimensions + + self.channels = channels + self.self_condition = self_condition + input_channels = channels * (2 if self_condition else 1) + + init_dim = default(init_dim, dim) + self.init_conv = nn.Conv1d(input_channels, init_dim, 7, padding = 3) + + dims = [init_dim, *map(lambda m: dim * m, dim_mults)] + in_out = list(zip(dims[:-1], dims[1:])) + + block_klass = partial(ResnetBlock, groups = resnet_block_groups) + + # time embeddings + + time_dim = dim * 4 + + self.random_or_learned_sinusoidal_cond = learned_sinusoidal_cond or random_fourier_features + + if self.random_or_learned_sinusoidal_cond: + sinu_pos_emb = RandomOrLearnedSinusoidalPosEmb(learned_sinusoidal_dim, random_fourier_features) + fourier_dim = learned_sinusoidal_dim + 1 + else: + sinu_pos_emb = SinusoidalPosEmb(dim, theta = sinusoidal_pos_emb_theta) + fourier_dim = dim + + self.time_mlp = nn.Sequential( + sinu_pos_emb, + nn.Linear(fourier_dim, time_dim), + nn.GELU(), + nn.Linear(time_dim, time_dim) + ) + + # layers + + self.downs = nn.ModuleList([]) + self.ups = nn.ModuleList([]) + num_resolutions = len(in_out) + + for ind, (dim_in, dim_out) in enumerate(in_out): + is_last = ind >= (num_resolutions - 1) + + self.downs.append(nn.ModuleList([ + block_klass(dim_in, dim_in, time_emb_dim = time_dim), + block_klass(dim_in, dim_in, time_emb_dim = time_dim), + Residual(PreNorm(dim_in, LinearAttention(dim_in))), + Downsample(dim_in, dim_out) if not is_last else nn.Conv1d(dim_in, dim_out, 3, padding = 1) + ])) + + mid_dim = dims[-1] + self.mid_block1 = block_klass(mid_dim, mid_dim, time_emb_dim = time_dim) + self.mid_attn = Residual(PreNorm(mid_dim, Attention(mid_dim, dim_head = attn_dim_head, heads = attn_heads))) + self.mid_block2 = block_klass(mid_dim, mid_dim, time_emb_dim = time_dim) + + for ind, (dim_in, dim_out) in enumerate(reversed(in_out)): + is_last = ind == (len(in_out) - 1) + + self.ups.append(nn.ModuleList([ + block_klass(dim_out + dim_in, dim_out, time_emb_dim = time_dim), + block_klass(dim_out + dim_in, dim_out, time_emb_dim = time_dim), + Residual(PreNorm(dim_out, LinearAttention(dim_out))), + Upsample(dim_out, dim_in) if not is_last else nn.Conv1d(dim_out, dim_in, 3, padding = 1) + ])) + + default_out_dim = channels * (1 if not learned_variance else 2) + self.out_dim = default(out_dim, default_out_dim) + + self.final_res_block = block_klass(dim * 2, dim, time_emb_dim = time_dim) + self.final_conv = nn.Conv1d(dim, self.out_dim, 1) + + def forward(self, x, time, x_self_cond = None): + if self.self_condition: + x_self_cond = default(x_self_cond, lambda: torch.zeros_like(x)) + x = torch.cat((x_self_cond, x), dim = 1) + + x = self.init_conv(x) + r = x.clone() + + t = self.time_mlp(time) + + h = [] + + for block1, block2, attn, downsample in self.downs: + x = block1(x, t) + h.append(x) + + x = block2(x, t) + x = attn(x) + h.append(x) + + x = downsample(x) + + x = self.mid_block1(x, t) + x = self.mid_attn(x) + x = self.mid_block2(x, t) + + for block1, block2, attn, upsample in self.ups: + x = torch.cat((x, h.pop()), dim = 1) + x = block1(x, t) + + x = torch.cat((x, h.pop()), dim = 1) + x = block2(x, t) + x = attn(x) + + x = upsample(x) + + x = torch.cat((x, r), dim = 1) + + x = self.final_res_block(x, t) + return self.final_conv(x) + +# gaussian diffusion trainer class + +def extract(a, t, x_shape): + b, *_ = t.shape + out = a.gather(-1, t) + return out.reshape(b, *((1,) * (len(x_shape) - 1))) + +def linear_beta_schedule(timesteps): + scale = 1000 / timesteps + beta_start = scale * 0.0001 + beta_end = scale * 0.02 + return torch.linspace(beta_start, beta_end, timesteps, dtype = torch.float64) + +def cosine_beta_schedule(timesteps, s = 0.008): + """ + cosine schedule + as proposed in https://openreview.net/forum?id=-NEXDKk8gZ + """ + steps = timesteps + 1 + x = torch.linspace(0, timesteps, steps, dtype = torch.float64) + alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * math.pi * 0.5) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1]) + return torch.clip(betas, 0, 0.999) + +class GaussianDiffusion1D(nn.Module): + def __init__( + self, + model, + *, + seq_length, + timesteps = 1000, + sampling_timesteps = None, + objective = 'pred_noise', + beta_schedule = 'cosine', + ddim_sampling_eta = 0., + auto_normalize = True + ): + super().__init__() + self.model = model + self.channels = self.model.channels + self.self_condition = self.model.self_condition + + self.seq_length = seq_length + + self.objective = objective + + assert objective in {'pred_noise', 'pred_x0', 'pred_v'}, 'objective must be either pred_noise (predict noise) or pred_x0 (predict image start) or pred_v (predict v [v-parameterization as defined in appendix D of progressive distillation paper, used in imagen-video successfully])' + + if beta_schedule == 'linear': + betas = linear_beta_schedule(timesteps) + elif beta_schedule == 'cosine': + betas = cosine_beta_schedule(timesteps) + else: + raise ValueError(f'unknown beta schedule {beta_schedule}') + + alphas = 1. - betas + alphas_cumprod = torch.cumprod(alphas, dim=0) + alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value = 1.) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + + # sampling related parameters + + self.sampling_timesteps = default(sampling_timesteps, timesteps) # default num sampling timesteps to number of timesteps at training + + assert self.sampling_timesteps <= timesteps + self.is_ddim_sampling = self.sampling_timesteps < timesteps + self.ddim_sampling_eta = ddim_sampling_eta + + # helper function to register buffer from float64 to float32 + + register_buffer = lambda name, val: self.register_buffer(name, val.to(torch.float32)) + + register_buffer('betas', betas) + register_buffer('alphas_cumprod', alphas_cumprod) + register_buffer('alphas_cumprod_prev', alphas_cumprod_prev) + + # calculations for diffusion q(x_t | x_{t-1}) and others + + register_buffer('sqrt_alphas_cumprod', torch.sqrt(alphas_cumprod)) + register_buffer('sqrt_one_minus_alphas_cumprod', torch.sqrt(1. - alphas_cumprod)) + register_buffer('log_one_minus_alphas_cumprod', torch.log(1. - alphas_cumprod)) + register_buffer('sqrt_recip_alphas_cumprod', torch.sqrt(1. / alphas_cumprod)) + register_buffer('sqrt_recipm1_alphas_cumprod', torch.sqrt(1. / alphas_cumprod - 1)) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + + posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) + + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + + register_buffer('posterior_variance', posterior_variance) + + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + + register_buffer('posterior_log_variance_clipped', torch.log(posterior_variance.clamp(min =1e-20))) + register_buffer('posterior_mean_coef1', betas * torch.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod)) + register_buffer('posterior_mean_coef2', (1. - alphas_cumprod_prev) * torch.sqrt(alphas) / (1. - alphas_cumprod)) + + # calculate loss weight + + snr = alphas_cumprod / (1 - alphas_cumprod) + + if objective == 'pred_noise': + loss_weight = torch.ones_like(snr) + elif objective == 'pred_x0': + loss_weight = snr + elif objective == 'pred_v': + loss_weight = snr / (snr + 1) + + register_buffer('loss_weight', loss_weight) + + # whether to autonormalize + + self.normalize = normalize_to_neg_one_to_one if auto_normalize else identity + self.unnormalize = unnormalize_to_zero_to_one if auto_normalize else identity + + def predict_start_from_noise(self, x_t, t, noise): + return ( + extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - + extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * noise + ) + + def predict_noise_from_start(self, x_t, t, x0): + return ( + (extract(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t - x0) / \ + extract(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) + ) + + def predict_v(self, x_start, t, noise): + return ( + extract(self.sqrt_alphas_cumprod, t, x_start.shape) * noise - + extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * x_start + ) + + def predict_start_from_v(self, x_t, t, v): + return ( + extract(self.sqrt_alphas_cumprod, t, x_t.shape) * x_t - + extract(self.sqrt_one_minus_alphas_cumprod, t, x_t.shape) * v + ) + + def q_posterior(self, x_start, x_t, t): + posterior_mean = ( + extract(self.posterior_mean_coef1, t, x_t.shape) * x_start + + extract(self.posterior_mean_coef2, t, x_t.shape) * x_t + ) + posterior_variance = extract(self.posterior_variance, t, x_t.shape) + posterior_log_variance_clipped = extract(self.posterior_log_variance_clipped, t, x_t.shape) + return posterior_mean, posterior_variance, posterior_log_variance_clipped + + def model_predictions(self, x, t, x_self_cond = None, clip_x_start = False, rederive_pred_noise = False): + model_output = self.model(x, t, x_self_cond) + maybe_clip = partial(torch.clamp, min = -1., max = 1.) if clip_x_start else identity + + if self.objective == 'pred_noise': + pred_noise = model_output + x_start = self.predict_start_from_noise(x, t, pred_noise) + x_start = maybe_clip(x_start) + + if clip_x_start and rederive_pred_noise: + pred_noise = self.predict_noise_from_start(x, t, x_start) + + elif self.objective == 'pred_x0': + x_start = model_output + x_start = maybe_clip(x_start) + pred_noise = self.predict_noise_from_start(x, t, x_start) + + elif self.objective == 'pred_v': + v = model_output + x_start = self.predict_start_from_v(x, t, v) + x_start = maybe_clip(x_start) + pred_noise = self.predict_noise_from_start(x, t, x_start) + + return ModelPrediction(pred_noise, x_start) + + def p_mean_variance(self, x, t, x_self_cond = None, clip_denoised = True): + preds = self.model_predictions(x, t, x_self_cond) + x_start = preds.pred_x_start + + if clip_denoised: + x_start.clamp_(-1., 1.) + + model_mean, posterior_variance, posterior_log_variance = self.q_posterior(x_start = x_start, x_t = x, t = t) + return model_mean, posterior_variance, posterior_log_variance, x_start + + @torch.no_grad() + def p_sample(self, x, t: int, x_self_cond = None, clip_denoised = True): + b, *_, device = *x.shape, x.device + batched_times = torch.full((b,), t, device = x.device, dtype = torch.long) + model_mean, _, model_log_variance, x_start = self.p_mean_variance(x = x, t = batched_times, x_self_cond = x_self_cond, clip_denoised = clip_denoised) + noise = torch.randn_like(x) if t > 0 else 0. # no noise if t == 0 + pred_img = model_mean + (0.5 * model_log_variance).exp() * noise + return pred_img, x_start + + @torch.no_grad() + def p_sample_loop(self, shape): + batch, device = shape[0], self.betas.device + + img = torch.randn(shape, device=device) + + x_start = None + + for t in tqdm(reversed(range(0, self.num_timesteps)), desc = 'sampling loop time step', total = self.num_timesteps): + self_cond = x_start if self.self_condition else None + img, x_start = self.p_sample(img, t, self_cond) + + img = self.unnormalize(img) + return img + + @torch.no_grad() + def ddim_sample(self, shape, clip_denoised = True): + batch, device, total_timesteps, sampling_timesteps, eta, objective = shape[0], self.betas.device, self.num_timesteps, self.sampling_timesteps, self.ddim_sampling_eta, self.objective + + times = torch.linspace(-1, total_timesteps - 1, steps=sampling_timesteps + 1) # [-1, 0, 1, 2, ..., T-1] when sampling_timesteps == total_timesteps + times = list(reversed(times.int().tolist())) + time_pairs = list(zip(times[:-1], times[1:])) # [(T-1, T-2), (T-2, T-3), ..., (1, 0), (0, -1)] + + img = torch.randn(shape, device = device) + + x_start = None + + for time, time_next in tqdm(time_pairs, desc = 'sampling loop time step'): + time_cond = torch.full((batch,), time, device=device, dtype=torch.long) + self_cond = x_start if self.self_condition else None + pred_noise, x_start, *_ = self.model_predictions(img, time_cond, self_cond, clip_x_start = clip_denoised) + + if time_next < 0: + img = x_start + continue + + alpha = self.alphas_cumprod[time] + alpha_next = self.alphas_cumprod[time_next] + + sigma = eta * ((1 - alpha / alpha_next) * (1 - alpha_next) / (1 - alpha)).sqrt() + c = (1 - alpha_next - sigma ** 2).sqrt() + + noise = torch.randn_like(img) + + img = x_start * alpha_next.sqrt() + \ + c * pred_noise + \ + sigma * noise + + img = self.unnormalize(img) + return img + + @torch.no_grad() + def sample(self, batch_size = 16): + seq_length, channels = self.seq_length, self.channels + sample_fn = self.p_sample_loop if not self.is_ddim_sampling else self.ddim_sample + return sample_fn((batch_size, channels, seq_length)) + + @torch.no_grad() + def interpolate(self, x1, x2, t = None, lam = 0.5): + b, *_, device = *x1.shape, x1.device + t = default(t, self.num_timesteps - 1) + + assert x1.shape == x2.shape + + t_batched = torch.full((b,), t, device = device) + xt1, xt2 = map(lambda x: self.q_sample(x, t = t_batched), (x1, x2)) + + img = (1 - lam) * xt1 + lam * xt2 + + x_start = None + + for i in tqdm(reversed(range(0, t)), desc = 'interpolation sample time step', total = t): + self_cond = x_start if self.self_condition else None + img, x_start = self.p_sample(img, i, self_cond) + + return img + + @autocast(enabled = False) + def q_sample(self, x_start, t, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + + return ( + extract(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start + + extract(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise + ) + + def p_losses(self, x_start, t, noise = None): + b, c, n = x_start.shape + noise = default(noise, lambda: torch.randn_like(x_start)) + + # noise sample + + x = self.q_sample(x_start = x_start, t = t, noise = noise) + + # if doing self-conditioning, 50% of the time, predict x_start from current set of times + # and condition with unet with that + # this technique will slow down training by 25%, but seems to lower FID significantly + + x_self_cond = None + if self.self_condition and random() < 0.5: + with torch.no_grad(): + x_self_cond = self.model_predictions(x, t).pred_x_start + x_self_cond.detach_() + + # predict and take gradient step + + model_out = self.model(x, t, x_self_cond) + + if self.objective == 'pred_noise': + target = noise + elif self.objective == 'pred_x0': + target = x_start + elif self.objective == 'pred_v': + v = self.predict_v(x_start, t, noise) + target = v + else: + raise ValueError(f'unknown objective {self.objective}') + + loss = F.mse_loss(model_out, target, reduction = 'none') + loss = reduce(loss, 'b ... -> b', 'mean') + + loss = loss * extract(self.loss_weight, t, loss.shape) + return loss.mean() + + def forward(self, img, *args, **kwargs): + b, c, n, device, seq_length, = *img.shape, img.device, self.seq_length + assert n == seq_length, f'seq length must be {seq_length}' + t = torch.randint(0, self.num_timesteps, (b,), device=device).long() + + img = self.normalize(img) + return self.p_losses(img, t, *args, **kwargs) + +# trainer class + +class Trainer1D(object): + def __init__( + self, + diffusion_model: GaussianDiffusion1D, + dataset: Dataset, + *, + train_batch_size = 16, + gradient_accumulate_every = 1, + train_lr = 1e-4, + train_num_steps = 100000, + ema_update_every = 10, + ema_decay = 0.995, + adam_betas = (0.9, 0.99), + save_and_sample_every = 1000, + num_samples = 25, + results_folder = './results', + amp = False, + mixed_precision_type = 'fp16', + split_batches = True, + max_grad_norm = 1. + ): + super().__init__() + + # accelerator + + self.accelerator = Accelerator( + split_batches = split_batches, + mixed_precision = mixed_precision_type if amp else 'no' + ) + + # model + + self.model = diffusion_model + self.channels = diffusion_model.channels + + # sampling and training hyperparameters + + assert has_int_squareroot(num_samples), 'number of samples must have an integer square root' + self.num_samples = num_samples + self.save_and_sample_every = save_and_sample_every + + self.batch_size = train_batch_size + self.gradient_accumulate_every = gradient_accumulate_every + self.max_grad_norm = max_grad_norm + + self.train_num_steps = train_num_steps + + # dataset and dataloader + + dl = DataLoader(dataset, batch_size = train_batch_size, shuffle = True, pin_memory = True, num_workers = cpu_count()) + + dl = self.accelerator.prepare(dl) + self.dl = cycle(dl) + + # optimizer + + self.opt = Adam(diffusion_model.parameters(), lr = train_lr, betas = adam_betas) + + # for logging results in a folder periodically + + if self.accelerator.is_main_process: + self.ema = EMA(diffusion_model, beta = ema_decay, update_every = ema_update_every) + self.ema.to(self.device) + + self.results_folder = Path(results_folder) + self.results_folder.mkdir(exist_ok = True) + + # step counter state + + self.step = 0 + + # prepare model, dataloader, optimizer with accelerator + + self.model, self.opt = self.accelerator.prepare(self.model, self.opt) + + @property + def device(self): + return self.accelerator.device + + def save(self, milestone): + if not self.accelerator.is_local_main_process: + return + + data = { + 'step': self.step, + 'model': self.accelerator.get_state_dict(self.model), + 'opt': self.opt.state_dict(), + 'ema': self.ema.state_dict(), + 'scaler': self.accelerator.scaler.state_dict() if exists(self.accelerator.scaler) else None, + 'version': __version__ + } + + torch.save(data, str(self.results_folder / f'model-{milestone}.pt')) + + def load(self, milestone): + accelerator = self.accelerator + device = accelerator.device + + data = torch.load(str(self.results_folder / f'model-{milestone}.pt'), map_location=device) + + model = self.accelerator.unwrap_model(self.model) + model.load_state_dict(data['model']) + + self.step = data['step'] + self.opt.load_state_dict(data['opt']) + if self.accelerator.is_main_process: + self.ema.load_state_dict(data["ema"]) + + if 'version' in data: + print(f"loading from version {data['version']}") + + if exists(self.accelerator.scaler) and exists(data['scaler']): + self.accelerator.scaler.load_state_dict(data['scaler']) diff --git a/subject1-4/AdaDiff/diffusion_module2.py b/subject1-4/AdaDiff/diffusion_module2.py new file mode 100644 index 0000000000000000000000000000000000000000..61599470a6ee72a515373d2b39e4c6dcf9c563f1 --- /dev/null +++ b/subject1-4/AdaDiff/diffusion_module2.py @@ -0,0 +1,159 @@ +import torch +import torch.nn.functional as F +from tqdm import tqdm + +def cosine_beta_schedule(timesteps, s=0.008): + """ + cosine schedule as proposed in https://arxiv.org/abs/2102.09672 + """ + steps = timesteps + 1 + x = torch.linspace(0, timesteps, steps) + alphas_cumprod = torch.cos(((x / timesteps) + s) / (1 + s) * torch.pi * 0.5) ** 2 + alphas_cumprod = alphas_cumprod / alphas_cumprod[0] + betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1]) + return torch.clip(betas, 0.0001, 0.9999) + +def linear_beta_schedule(timesteps): + beta_start = 0.0001 + beta_end = 0.02 + return torch.linspace(beta_start, beta_end, timesteps) + +def quadratic_beta_schedule(timesteps): + beta_start = 0.0001 + beta_end = 0.02 + return torch.linspace(beta_start**0.5, beta_end**0.5, timesteps) ** 2 + +def sigmoid_beta_schedule(timesteps): + beta_start = 0.0001 + beta_end = 0.02 + betas = torch.linspace(-6, 6, timesteps) + return torch.sigmoid(betas) * (beta_end - beta_start) + beta_start + +timesteps = 300 + +# define beta schedule +betas = linear_beta_schedule(timesteps=timesteps) + +# define alphas +alphas = 1. - betas +alphas_cumprod = torch.cumprod(alphas, axis=0) +alphas_cumprod_prev = F.pad(alphas_cumprod[:-1], (1, 0), value=1.0) +sqrt_recip_alphas = torch.sqrt(1.0 / alphas) + +# calculations for diffusion q(x_t | x_{t-1}) and others +sqrt_alphas_cumprod = torch.sqrt(alphas_cumprod) +sqrt_one_minus_alphas_cumprod = torch.sqrt(1. - alphas_cumprod) + +# calculations for posterior q(x_{t-1} | x_t, x_0) +posterior_variance = betas * (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) + +def extract(a, t, x_shape): + batch_size = t.shape[0] + out = a.gather(-1, t.cpu()) + return out.reshape(batch_size, *((1,) * (len(x_shape) - 1))).to(t.device) + + +# forward diffusion (using the nice property) +def q_sample(x_start, t, noise=None): + if noise is None: + noise = torch.randn_like(x_start) + + sqrt_alphas_cumprod_t = extract(sqrt_alphas_cumprod, t, x_start.shape) + sqrt_one_minus_alphas_cumprod_t = extract( + sqrt_one_minus_alphas_cumprod, t, x_start.shape + ) + + return sqrt_alphas_cumprod_t * x_start + sqrt_one_minus_alphas_cumprod_t * noise + +def p_losses(denoise_model, x_start, t, noise=None, loss_type="l2"): + if noise is None: + noise = torch.randn_like(x_start) + + x_noisy = q_sample(x_start=x_start, t=t, noise=noise) + predicted_noise = denoise_model(x_noisy, t) + #if train: + if loss_type == 'l1': + loss = F.l1_loss(noise, predicted_noise) + elif loss_type == 'l2': + loss = F.mse_loss(noise, predicted_noise) + elif loss_type == "huber": + loss = F.smooth_l1_loss(noise, predicted_noise) + else: + raise NotImplementedError() + # else: + # x_recon = (x_noisy - extract(sqrt_one_minus_alphas_cumprod, t, x_noisy.shape) * predicted_noise) / extract(sqrt_alphas_cumprod, t, x_noisy.shape) + # loss = F.mse_loss(predicted_noise, noise, reduction='none') + return loss + + +##### SAMPLING ####### + +@torch.no_grad() +def p_sample(model, x, t, t_index): + betas_t = extract(betas, t, x.shape) + sqrt_one_minus_alphas_cumprod_t = extract( + sqrt_one_minus_alphas_cumprod, t, x.shape + ) + sqrt_recip_alphas_t = extract(sqrt_recip_alphas, t, x.shape) + + # Equation 11 in the paper + # Use our model (noise predictor) to predict the mean + model_mean = sqrt_recip_alphas_t * ( + x - betas_t * model(x, t) / sqrt_one_minus_alphas_cumprod_t + ) + + if t_index == 0: + return model_mean + else: + posterior_variance_t = extract(posterior_variance, t, x.shape) + noise = torch.randn_like(x) + # Algorithm 2 line 4: + return model_mean + torch.sqrt(posterior_variance_t) * noise + +# Algorithm 2 (including returning all images) +@torch.no_grad() +def p_sample_loop(model, shape, x_start, denoise_steps): + #device = next(model.parameters()).device + #timesteps = 200 + timesteps = denoise_steps + device = 'cuda' + + b = shape[0] + # start from pure noise (for each example in the batch) + #img = torch.randn(shape, device=device) + noise = torch.randn_like(x_start) + img = q_sample(x_start=x_start, t=torch.full((b,), timesteps, device=device, dtype=torch.long), noise=noise) + # imgs = [] + + for i in tqdm(reversed(range(0, timesteps)), desc='sampling loop time step', total=timesteps): + img = p_sample(model, img, torch.full((b,), i, device=device, dtype=torch.long), i) + #imgs.append(img.cpu().numpy()) + return img + +@torch.no_grad() +def sample(model, shape, x_start, denoise_steps): + return p_sample_loop(model, shape=shape, x_start=x_start, denoise_steps=denoise_steps) +@torch.no_grad() +def sample(model, shape, x_start, denoise_steps, Tstart, Tend): + return p_sample_loop_withStartAndEnd(model, shape, x_start, denoise_steps, Tstart, Tend) + +@torch.no_grad() +def p_sample_loop_withStartAndEnd(model, shape, x_start, denoise_steps, Tstart, Tend): + #device = next(model.parameters()).device + #timesteps = 200 + timesteps = denoise_steps + device = 'cuda' + + b = shape[0] + # start from pure noise (for each example in the batch) + #img = torch.randn(shape, device=device) + if Tstart == 0: + noise = torch.randn_like(x_start) + img = q_sample(x_start=x_start, t=torch.full((b,), timesteps, device=device, dtype=torch.long), noise=noise) + else: + img = x_start + + for i in tqdm(reversed(range(timesteps - Tend, timesteps - Tstart)), desc='sampling loop time step', total=Tend - Tstart): + img = p_sample(model, img, torch.full((b,), i, device=device, dtype=torch.long), i) + #imgs.append(img.cpu().numpy()) + return img \ No newline at end of file diff --git a/subject1-4/AdaDiff/huigui.ipynb b/subject1-4/AdaDiff/huigui.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..3164575c129b427bb5d2245e421b2d905a2afdfd --- /dev/null +++ b/subject1-4/AdaDiff/huigui.ipynb @@ -0,0 +1,234 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# 假设你的数据是一个CSV文件,可以通过read_csv加载\n", + "# 如果数据不是CSV格式,可以根据实际情况使用其他read_函数\n", + "data = pd.read_csv('../alpha=0.1.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
prf1time
00.1500.771.47
10.1450.741.45
20.1350.701.43
30.1300.681.40
40.1250.631.39
\n", + "
" + ], + "text/plain": [ + " p r f1 time\n", + "0 0.1 50 0.77 1.47\n", + "1 0.1 45 0.74 1.45\n", + "2 0.1 35 0.70 1.43\n", + "3 0.1 30 0.68 1.40\n", + "4 0.1 25 0.63 1.39" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "特征 X:\n", + " p r\n", + "0 0.1 50\n", + "1 0.1 45\n", + "2 0.1 35\n", + "3 0.1 30\n", + "4 0.1 25\n", + "目标 y:\n", + " 0 0.77\n", + "1 0.74\n", + "2 0.70\n", + "3 0.68\n", + "4 0.63\n", + "Name: f1, dtype: float64\n" + ] + } + ], + "source": [ + "X = data.iloc[:, :-2] # 选择除最后一列之外的所有列作为特征\n", + "f1 = data.iloc[:, -2] # 选择最后一列作为目标变量\n", + "time = data.iloc[:,-1]\n", + "# 打印 X 和 y,确保提取正确\n", + "print(\"特征 X:\\n\", X.head())\n", + "print(\"目标 y:\\n\", f1.head())" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "回归系数: [-0.92813953 0.00513953]\n", + "截距: 0.6066511627906976\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# 生成一些示例数据\n", + "# np.random.seed(0)\n", + "# X = 2 * np.random.rand(100, 2) # 有两个特征\n", + "# y = 4 + 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(100)\n", + "\n", + "# 创建线性回归模型\n", + "model = LinearRegression()\n", + "\n", + "# 将数据拟合到模型中\n", + "model.fit(X, f1)\n", + "\n", + "# 打印回归系数和截距\n", + "print(\"回归系数:\", model.coef_)\n", + "print(\"截距:\", model.intercept_)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "回归系数: [-0.53418605 0.00318605]\n", + "截距: 1.36353488372093\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.linear_model import LinearRegression\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# 生成一些示例数据\n", + "# np.random.seed(0)\n", + "# X = 2 * np.random.rand(100, 2) # 有两个特征\n", + "# y = 4 + 3 * X[:, 0] + 2 * X[:, 1] + np.random.randn(100)\n", + "\n", + "# 创建线性回归模型\n", + "model = LinearRegression()\n", + "\n", + "# 将数据拟合到模型中\n", + "model.fit(X, time)\n", + "\n", + "# 打印回归系数和截距\n", + "print(\"回归系数:\", model.coef_)\n", + "print(\"截距:\", model.intercept_)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "diffusionA", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/subject1-4/AdaDiff/models2.py b/subject1-4/AdaDiff/models2.py new file mode 100644 index 0000000000000000000000000000000000000000..506a11b0fa0ea756d72b69a222bfe54cbf9256d1 --- /dev/null +++ b/subject1-4/AdaDiff/models2.py @@ -0,0 +1,273 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import pickle +from torch.nn import TransformerEncoder, TransformerEncoderLayer, TransformerDecoderLayer +from torch.nn import TransformerDecoder +from src.dlutils import * +from unet2 import Unet +from scipy import stats +from diffusion_module2 import p_losses, sample +from denoising_diffusion_pytorch_1d import Unet1D +device = 'cuda' + + +class Autoencoder_Diffusion(nn.Module): + def __init__(self, feats, lr, window_size, batch_size, p1, p2): + super().__init__() + self.name = 'AutoencoderDiffusion' + self.lr = lr + self.batch = batch_size + self.n_feats = feats + self.n_window = window_size + self.p1 = p1 + self.p2 = p2 + self.bottleneck_s1 = int(p1 * self.n_feats * self.n_window) + self.bottleneck_s2 = int(p2 * self.n_feats * self.n_window) + self.bottleneck = 32 + + self.encoder = torch.nn.Sequential( + torch.nn.Linear(self.n_feats * self.n_window, self.bottleneck_s1), + torch.nn.ReLU(), + torch.nn.Linear(self.bottleneck_s1, self.bottleneck_s2), + ) + + #self.encoder = torch.nn.Linear(self.n_feats * self.n_window, self.bottleneck) + + self.decoder = torch.nn.Sequential( + torch.nn.Linear(self.bottleneck_s2, self.bottleneck_s1), + torch.nn.ReLU(), + torch.nn.Linear(self.bottleneck_s1, self.n_feats * self.n_window), + ) + + self.activation_fn = torch.nn.ReLU() + + #self.decoder = torch.nn.Linear(self.bottleneck, self.n_feats * self.n_window) + + + def forward(self, x): + encoded = self.encoder(x) + decoded = self.decoder(encoded) + return decoded + + +class ConditionalDiffusionTrainingNetwork(nn.Module): + + def __init__(self,nr_feats, window_size, batch_size, noise_steps, denoise_steps, train=True): + super().__init__() + self.dim = min(nr_feats, 16) + self.nr_feats = nr_feats + self.window_size = window_size + self.batch_size = batch_size + + self.training = train + self.timesteps = noise_steps + self.denoise_steps = denoise_steps + + # self.denoise_fn = Unet(dim=self.dim, channels=1, resnet_block_groups=1, init_size=torch.Size([self.dim, self.window_size, self.nr_feats])) + self.denoise_fn = model = Unet1D( + dim = 64, + dim_mults = (1, 2, 4, 8), + channels = nr_feats + ) + + def forward(self, x, Tstart = None, Tend = None): + + diffusion_loss = None + x_recon = None + if Tstart == None: + Tstart = 0 + if Tend == None: + Tend = self.denoise_steps + + # x = x.reshape(-1, 1, self.window_size, self.nr_feats) + # print(self.nr_feats) + x = x.transpose(2,1) + # print(np.shape(x)) + if self.training: + t = torch.randint(0, self.timesteps, (x.shape[0],), device=device).long() + diffusion_loss = p_losses(self.denoise_fn, x, t) + else: + # x_recon = sample(self.denoise_fn, shape=(x.shape[0], 1, self.window_size, self.nr_feats), x_start=x, denoise_steps=self.denoise_steps) + x_recon = sample(self.denoise_fn, shape=(x.shape[0], 1, self.window_size, self.nr_feats), x_start=x, denoise_steps=self.denoise_steps, Tstart = Tstart, Tend = Tend) + return diffusion_loss, x_recon + + + +class TransformerBasic(nn.Module): + def __init__(self, feats): + super().__init__() + self.name = 'TransformerBasic' + self.lr = 0.1 + self.batch = 128 + self.n_feats = feats + self.n_window = 10 + + self.lin = nn.Linear(1, feats) + self.out_lin = nn.Linear(feats, 1) + self.pos_encoder = PositionalEncoding(feats, 0.1, feats*self.n_window) + encoder_layers = TransformerEncoderLayer(d_model=feats, nhead=feats, dim_feedforward=16, dropout=0.1) + self.transformer_encoder = TransformerEncoder(encoder_layers, 1) + decoder_layers = TransformerDecoderLayer(d_model=feats, nhead=feats, dim_feedforward=16, dropout=0.1) + self.transformer_decoder = TransformerDecoder(decoder_layers, 1) + self.fcn = nn.Sigmoid() + + def forward(self, src, tgt): + # bs x (ws x features) x features + src = src * math.sqrt(self.n_feats) + src = self.lin(src.unsqueeze(2)) + src = self.pos_encoder(src) + memory = self.transformer_encoder(src) + + tgt = tgt * math.sqrt(self.n_feats) + tgt = self.lin(tgt.unsqueeze(2)) + tgt = self.pos_encoder(tgt) + x = self.transformer_decoder(tgt, memory) + x = self.out_lin(x) + x = self.fcn(x) + return x + +class TransformerBasicv2(nn.Module): + def __init__(self, feats, lr, window_size): + super(TransformerBasicv2, self).__init__() + self.name = 'TransformerBasicv2' + self.lr = lr + self.batch = 128 + self.n_feats = feats + self.n_window = window_size + self.scale = 16 + self.linear_layer = nn.Linear(feats, self.scale*feats) + self.output_layer = nn.Linear(self.scale*feats, feats) + self.pos_encoder = PositionalEncoding(self.scale*feats, 0.1, self.n_window, batch_first=True) + encoder_layers = TransformerEncoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_encoder = TransformerEncoder(encoder_layers, 1) + decoder_layers = TransformerDecoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_decoder = TransformerDecoder(decoder_layers, 1) + self.fcn = nn.Sigmoid() + + def forward(self, src, tgt): + src = src * math.sqrt(self.n_feats) + src = self.linear_layer(src) + src = self.pos_encoder(src) + memory = self.transformer_encoder(src) + + tgt = tgt * math.sqrt(self.n_feats) + tgt = self.linear_layer(tgt) + + x = self.transformer_decoder(tgt, memory) + x = self.output_layer(x) + x = self.fcn(x) + return x + +class TransformerBasicv2Scaling(nn.Module): + def __init__(self, feats, lr, window_size): + super(TransformerBasicv2Scaling, self).__init__() + self.name = 'TransformerBasicv2Scaling' + self.lr = lr + self.batch = 128 + self.n_feats = feats + self.n_window = window_size + self.scale = 16 + self.linear_layer = nn.Linear(feats, self.scale*feats) + self.output_layer = nn.Linear(self.scale*feats, feats) + self.pos_encoder = PositionalEncoding(self.scale*feats, 0.1, self.n_window, batch_first=True) + encoder_layers = TransformerEncoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_encoder = TransformerEncoder(encoder_layers, 1) + decoder_layers = TransformerDecoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_decoder = TransformerDecoder(decoder_layers, 1) + self.fcn = nn.Sigmoid() + + def forward(self, src, tgt): + model_dim = self.scale * self.n_feats + + src = self.linear_layer(src) + src = src * math.sqrt(model_dim) + src = self.pos_encoder(src) + memory = self.transformer_encoder(src) + + tgt = self.linear_layer(tgt) + tgt = tgt * math.sqrt(model_dim) + + x = self.transformer_decoder(tgt, memory) + x = self.output_layer(x) + x = self.fcn(x) + return x + + + +class TransformerBasicBottleneck(nn.Module): + def __init__(self, feats, lr, window_size): + super(TransformerBasicBottleneck, self).__init__() + self.name = 'TransformerBasicBottleneck' + self.lr = lr + self.batch = 16 + self.n_feats = feats + self.n_window = window_size + self.scale = 16 + self.linear_layer = nn.Linear(feats, self.scale*feats) + self.output_layer = nn.Linear(self.scale*feats, feats) + self.pos_encoder = PositionalEncoding(self.scale*feats, 0.1, self.n_window, batch_first=True) + encoder_layers = TransformerEncoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_encoder = TransformerEncoder(encoder_layers, 1) + decoder_layers = TransformerDecoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_decoder = TransformerDecoder(decoder_layers, 1) + self.fcn = nn.Sigmoid() + + def forward(self, src, tgt): + src = src * math.sqrt(self.n_feats) + src = self.linear_layer(src) + src = self.pos_encoder(src) + # batch x t x d + memory = self.transformer_encoder(src) + # batch x 1 x d + z = torch.mean(memory, dim=1, keepdim=True) + + + tgt = tgt * math.sqrt(self.n_feats) + tgt = self.linear_layer(tgt) + + x = self.transformer_decoder(tgt, z) + x = self.output_layer(x) + x = self.fcn(x) + return x + +class TransformerBasicBottleneckScaling(nn.Module): + def __init__(self, feats, lr, window_size, batch_size): + super(TransformerBasicBottleneckScaling, self).__init__() + self.name = 'TransformerBasicBottleneckScaling' + self.lr = lr + self.batch = batch_size + self.n_feats = feats + self.n_window = window_size + self.scale = 16 + self.linear_layer = nn.Linear(feats, self.scale*feats) + self.output_layer = nn.Linear(self.scale*feats, feats) + self.pos_encoder = PositionalEncoding(self.scale*feats, 0.1, self.n_window, batch_first=True) + encoder_layers = TransformerEncoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_encoder = TransformerEncoder(encoder_layers, 1) + decoder_layers = TransformerDecoderLayer(d_model=feats*self.scale, nhead=feats, batch_first=True, dim_feedforward=256, dropout=0.1) + self.transformer_decoder = TransformerDecoder(decoder_layers, 1) + self.fcn = nn.Sigmoid() + + def forward(self, src, tgt): + model_dim = self.scale * self.n_feats + + src = self.linear_layer(src) + src = src * math.sqrt(model_dim) + src = self.pos_encoder(src) + # batch x t x d + memory = self.transformer_encoder(src) + # batch x 1 x d + z = torch.mean(memory, dim=1, keepdim=True) + + tgt = self.linear_layer(tgt) + tgt = tgt * math.sqrt(model_dim) + + x = self.transformer_decoder(tgt, z) + x = self.output_layer(x) + x = self.fcn(x) + return x + + + diff --git a/subject1-4/AdaDiff/server.py b/subject1-4/AdaDiff/server.py new file mode 100644 index 0000000000000000000000000000000000000000..edc5ce50975590bb7fcfed040569d1172e7eef03 --- /dev/null +++ b/subject1-4/AdaDiff/server.py @@ -0,0 +1,151 @@ +import socket +import threading +import os +import ast +import time +import csv +import diffusion_module2 +import train_diffusion_val +import torch +from torch.utils.data import Dataset, DataLoader, TensorDataset +import torch.nn as nn +import numpy as np +import time +import pickle +device = "cuda" +class Server: + def __init__(self, address, model): + self.address = address + self.clients = [] + self.real_anomalies = set() + self.model = model + + # timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime()) + + # # 将时间戳添加到 CSV 文件名中 + # self.log_file_name = f"server_log_{timestamp}.csv" + + # # 创建 CSV 文件并写入表头 + # with open(self.log_file_name, 'w', newline='') as csvfile: + # fieldnames = ['Node ID', 'Anomaly Data ID', 'Is Real Anomaly'] + # writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + # writer.writeheader() + + + def start_server(self): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket: + server_socket.bind(self.address) + server_socket.listen() + + print(f"Server listening on {self.address}") + + while True: + client_socket, client_address = server_socket.accept() + client_thread = threading.Thread(target=self.handle_client, args=(client_socket, client_address)) + client_thread.start() + self.clients.append(client_thread) + + def handle_client(self, client_socket, client_address): + print(f"Accepted connection from {client_address}") + + length = int(client_socket.recv(1024).decode()) + print("收到长度",length) + client_socket.sendall('go'.encode()) + serialized_data = b'' + while True: + chunk = client_socket.recv(1024) # 接收数据块(这里假设每次接收1KB) + + serialized_data += chunk + if len(serialized_data) == length: # 如果接收到的数据为空,表示传输完毕 + break + # print(len(serialized_data)) + deserialized_data = pickle.loads(serialized_data) + # 处理反序列化后的数据... + ratio = deserialized_data["ratio"] + data_fragment = deserialized_data["data"] + print(f"Received ratio: {ratio}") + + # 进行进一步的异常检测 + data = self.denoising(data_fragment, ratio) + + serialized_data = pickle.dumps(data) + chunk_size = 1024 + + client_socket.sendall(str(len(serialized_data)).encode()) + point = client_socket.recv(1024) + print("收到信号", point.decode()) + print(np.shape(data)) + print(len(serialized_data)) + for i in range(0, len(serialized_data), chunk_size): + chunk = serialized_data[i:i+chunk_size] + client_socket.sendall(chunk) + print(f"Connection from {client_address} closed") + client_socket.close() + def denoising(self, data, ratio): + # print(np.shape(data)) + data.to(device) + return self.model(data,int(self.model.denoise_steps* ratio),self.model.denoise_steps)[1].transpose(2,1) + + +if __name__ == "__main__": + server_address = ('localhost', 8892) # 中心服务器地址和端口 + + + training_mode = "diffusion" + lr = 1e-3 + window_size = 128 + p1 = 1 + p2 = 1 + dataset_name = "point_global" + batch_size = 32 + noise_steps = 100 + denoise_steps = 50 + diff_lambda = 0.1 + part = None + device = "cuda" + + experiment = f'diffv4_{dataset_name}_{noise_steps}-{denoise_steps}_{diff_lambda}_1e-3_{batch_size}_{window_size}' + + train_loader, test_loader, validation_loader, labels, validation_labels = train_diffusion_val.load_dataset(dataset_name, part) + + model, diffusion_training_net, diffusion_prediction_net, optimizer, scheduler = \ + train_diffusion_val.load_model(training_mode ,lr, window_size, p1, p2, labels.shape[1], batch_size, noise_steps, denoise_steps) + model, diffusion_training_net = train_diffusion_val.load_from_checkpoint(training_mode, experiment, model, diffusion_training_net) + diffusion_training_net = diffusion_training_net.to(device) + diffusion_prediction_net = diffusion_prediction_net.to(device) + + diffusion_prediction_net.load_state_dict(diffusion_training_net.state_dict()) + diffusion_prediction_net.eval() + # diffusion_training_net.eval() + + # trainD, testD, validationD = next(iter(train_loader)), next(iter(test_loader)), next(iter(validation_loader)) + # testD = train_diffusion_val.convert_to_windows(testD, window_size) + # data_x = torch.tensor(testD, dtype=torch.float32); + # dataset = TensorDataset(data_x, data_x) + # dataloader = DataLoader(dataset, batch_size = batch_size) + + # STime = time.time() + # l1s = [] + # feats=labels.shape[1] + # for window, _ in dataloader: + # window = window.to(device) + # _, x_recon = diffusion_prediction_net(window, 0,50) + # x_recon = x_recon.transpose(2,1) + # l = nn.MSELoss(reduction = 'none') + # loss = l(x_recon, window) + # l1s.append(loss) + # ETime = time.time() + # loss0 = torch.cat(l1s).detach().cpu().numpy() + # loss0 = loss0.reshape(-1,feats) + + # lossFinal = np.mean(np.array(loss0), axis=1) + # labelsFinal = (np.sum(labels, axis=1) >= 1) + 0 + # validation_thresh = 0 + # result, fprs, tprs = train_diffusion_val.evaluate(lossFinal, labelsFinal, validation_thresh=validation_thresh) + # result_roc = result["ROC/AUC"] + # result_f1 = result["f1"] + + # print(result, ETime - STime) + + server = Server(server_address, diffusion_prediction_net) + server.start_server() diff --git a/subject1-4/AdaDiff/src/__pycache__/dlutils.cpython-38.pyc b/subject1-4/AdaDiff/src/__pycache__/dlutils.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1f389758846b18d3e10b2ef86123539b5e3e0a18 Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/dlutils.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/src/__pycache__/eval.cpython-311.pyc b/subject1-4/AdaDiff/src/__pycache__/eval.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..01bea26bc418915413e11ae24808c26a36b7fbc6 Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/eval.cpython-311.pyc differ diff --git a/subject1-4/AdaDiff/src/__pycache__/eval.cpython-38.pyc b/subject1-4/AdaDiff/src/__pycache__/eval.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2c7d0a3cbad08ed5670468c53f707d77fbd02b8e Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/eval.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/src/__pycache__/my_plotting.cpython-38.pyc b/subject1-4/AdaDiff/src/__pycache__/my_plotting.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..53d43d591a64e146329206513692bcb33cc27ef4 Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/my_plotting.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/src/__pycache__/parser.cpython-311.pyc b/subject1-4/AdaDiff/src/__pycache__/parser.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5bcd450fc7b64e07210ae7ac05001bb24d02b766 Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/parser.cpython-311.pyc differ diff --git a/subject1-4/AdaDiff/src/__pycache__/parser.cpython-38.pyc b/subject1-4/AdaDiff/src/__pycache__/parser.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..390ab0899c8371e34b9d27301ff0cf7678c4bfc0 Binary files /dev/null and b/subject1-4/AdaDiff/src/__pycache__/parser.cpython-38.pyc differ diff --git a/subject1-4/AdaDiff/src/dlutils.py b/subject1-4/AdaDiff/src/dlutils.py new file mode 100644 index 0000000000000000000000000000000000000000..7e8b550b4ce202c583ee4234797b4ff81c09e9e1 --- /dev/null +++ b/subject1-4/AdaDiff/src/dlutils.py @@ -0,0 +1,30 @@ +import torch.nn as nn +import torch +import torch.nn.functional as F +from torch.autograd import Variable +import math +import numpy as np + +class PositionalEncoding(nn.Module): + def __init__(self, d_model, dropout=0.1, max_len=5000, batch_first=False): + super(PositionalEncoding, self).__init__() + self.dropout = nn.Dropout(p=dropout) + self.batch_first = batch_first + + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model).float() * (-math.log(10000.0) / d_model)) + pe += torch.sin(position * div_term) + pe += torch.cos(position * div_term) + if self.batch_first: + pe = pe.unsqueeze(0) + else: + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer('pe', pe) + + def forward(self, x, pos=0): + if self.batch_first: + x = x + self.pe[pos:pos+x.size(1), :] + else: + x = x + self.pe[pos:pos+x.size(0), :] + return self.dropout(x) diff --git a/subject1-4/AdaDiff/src/eval.py b/subject1-4/AdaDiff/src/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..5f1767b64e9b302d64ed8601803496d786657339 --- /dev/null +++ b/subject1-4/AdaDiff/src/eval.py @@ -0,0 +1,118 @@ +import numpy as np +from sklearn.metrics import * +import matplotlib.pyplot as plt +from tadpak import pak + +def get_fp_tp_rate(predict, actual): + tn, fp, fn, tp = confusion_matrix(actual, predict, labels=[0, 1]).ravel() + + true_pos_rate = tp/(tp+fn) + false_pos_rate = fp/(fp+tn) + + return false_pos_rate, true_pos_rate + + +def pak_protocol(scores, labels, threshold, max_k=100): + f1s = [] + ks = [k/100 for k in range(0, max_k + 1)] + fprs = [] + tprs = [] + preds = [] + + for k in range(max_k +1): + adjusted_preds = pak.pak(scores, labels, threshold, k=k) + f1 = f1_score(labels, adjusted_preds) + fpr, tpr = get_fp_tp_rate(adjusted_preds, labels) + fprs.append(fpr) + tprs.append(tpr) + #print(f1) + #print(k) + f1s.append(f1) + preds.append(adjusted_preds) + + area_under_f1 = auc(ks, f1s) + max_f1_k = max(f1s) + k_max = f1s.index(max_f1_k) + preds_for_max = preds[f1s.index(max_f1_k)] + # import matplotlib.pyplot as plt + # plt.cla() + # plt.plot(ks, f1s) + # plt.savefig('DiffusionAE/plots/PAK_PROTOCOL') + #print(f'AREA UNDER CURVE {area}') + return area_under_f1, max_f1_k, k_max, preds_for_max, fprs, tprs + + +def evaluate(score, label, validation_thresh=None): + if len(score) != len(label): + score = score[:len(score) - (len(score) - len(label))] + false_pos_rates = [] + true_pos_rates = [] + f1s = [] + max_f1s_k = [] + preds = [] + #thresholds = np.arange(0, score.max(), min(0.001, score.max()/50))#0.001 + thresholds = np.arange(0, score.max(), score.max()/50)#0.001 + + max_ks = [] + pairs = [] + + + for thresh in thresholds: + f1, max_f1_k, k_max, best_preds, fprs, tprs = pak_protocol(score, label, thresh) + max_f1s_k.append(max_f1_k) + max_ks.append(k_max) + preds.append(best_preds) + false_pos_rates.append(fprs) + true_pos_rates.append(tprs) + f1s.append(f1) + pairs.extend([(thresh, i) for i in range(101)]) + + if validation_thresh: + f1, max_f1_k, max_k, best_preds, _, _ = pak_protocol(score, label, validation_thresh) + else: + f1 = max(f1s) + max_possible_f1 = max(max_f1s_k) + max_idx = max_f1s_k.index(max_possible_f1) + max_k = max_ks[max_idx] + thresh_max_f1 = thresholds[max_idx] + best_preds = preds[max_idx] + best_thresh = thresholds[f1s.index(f1)] + + roc_max = auc(np.transpose(false_pos_rates)[max_k], np.transpose(true_pos_rates)[max_k]) + #np.save('/root/Diff-Anomaly/DiffusionAE/plots_for_paper/fprs_diff_score_pa.npy', np.transpose(false_pos_rates)[0]) + #np.save('/root/Diff-Anomaly/DiffusionAE/plots_for_paper/tprs_diff_score_pa.npy', np.transpose(true_pos_rates)[0]) + + false_pos_rates = np.array(false_pos_rates).flatten() + true_pos_rates = np.array(true_pos_rates).flatten() + + sorted_indexes = np.argsort(false_pos_rates) + false_pos_rates = false_pos_rates[sorted_indexes] + true_pos_rates = true_pos_rates[sorted_indexes] + pairs = np.array(pairs)[sorted_indexes] + roc_score = auc(false_pos_rates, true_pos_rates) + + #np.save('/root/Diff-Anomaly/DiffusionAE/plots_for_paper/tprs_diff_score.npy', true_pos_rates) + #np.save('/root/Diff-Anomaly/DiffusionAE/plots_for_paper/fprs_diff_score.npy', false_pos_rates) + #np.save('/root/Diff-Anomaly/DiffusionAE/plots_for_paper/pairs_diff_score.npy', pairs) + #preds = predictions[f1s.index(f1)] + if validation_thresh: + return { + 'f1': f1, # f1_k(area under f1) for validation threshold + 'ROC/AUC': roc_score, # for all ks and all thresholds obtained on test scores + 'f1_max': max_f1_k, # best f1 across k values + 'preds': best_preds, # corresponding to best k + 'k': max_k, # the k value correlated with the best f1 across k=1,100 + 'thresh_max': validation_thresh, + 'roc_max': roc_score, + } + else: + return { + 'f1': f1, + 'ROC/AUC': roc_score, + 'threshold': best_thresh, + 'f1_max': max_possible_f1, + 'roc_max': roc_max, + 'thresh_max': thresh_max_f1, + 'preds': best_preds, + 'k': max_k, + }, false_pos_rates, true_pos_rates diff --git a/subject1-4/AdaDiff/src/my_plotting.py b/subject1-4/AdaDiff/src/my_plotting.py new file mode 100644 index 0000000000000000000000000000000000000000..75a8dc1de9109dd52284c31bc3de5e1999c64161 --- /dev/null +++ b/subject1-4/AdaDiff/src/my_plotting.py @@ -0,0 +1,164 @@ +import matplotlib.pyplot as plt +from matplotlib.backends.backend_pdf import PdfPages +import statistics +import os, torch +import numpy as np + +#plt.style.use(['science', 'ieee']) +plt.rcParams["text.usetex"] = False +plt.rcParams['figure.figsize'] = 6, 2 + +os.makedirs('plots', exist_ok=True) + +def smooth(y, box_pts=1): + box = np.ones(box_pts)/box_pts + y_smooth = np.convolve(y, box, mode='same') + return y_smooth + +# def plotter(ground_truth, prediction, ascore, labels, preds=False): +# dim = 0 +# y_t, y_p, l, a_s = ground_truth[0:1000, dim], prediction[0:1000, dim], labels[0:1000, dim], ascore[0:1000, dim] +# plt.clf() +# fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) +# ax1.set_ylabel('Value') +# ax1.set_title(f'Dimension = {dim}') +# ax1.plot(smooth(y_t), linewidth=0.2, label='True') +# ax1.plot(smooth(y_p), '-', alpha=0.6, linewidth=0.3, label='Predicted') +# ax3 = ax1.twinx() +# ax3.plot(l, '--', linewidth=0.3, alpha=0.5) +# ax3.fill_between(np.arange(l.shape[0]), l, color='yellow', alpha=0.3) +# #ax4 = ax2.twinx() +# #ax4.plot(pred, '--', linewidth=0.3, alpha=0.5) +# #ax4.fill_between(np.arange(l.shape[0]), pred, color='yellow', alpha=0.3) +# ax1.legend(ncol=2, bbox_to_anchor=(0.6, 1.02)) +# ax2.plot(smooth(a_s), linewidth=0.2, color='g') +# ax2.set_xlabel('Timestamp') +# ax2.set_ylabel('Anomaly Score') +# plt.savefig('/root/Diff-Anomaly/DiffusionAE/plots/AAAAAA.jpg') +# plt.close() + + +def plotter(model, dataset, ground_truth, anomaly_score, labels, results=None, ae_recon=None, diff_sample=None, preds=None, dim=0, plot_test=True, epoch=0, set='test'): + if ground_truth.shape[-1] < 6: + timestamps = 4000 + else: + timestamps = len(labels) + print(timestamps) + #timestamps = len(ground_truth) + gt = ground_truth[0:timestamps, dim] + labels = labels[0:timestamps] + score = anomaly_score[0:timestamps] + preds = results['preds'] + preds = preds[0:timestamps] + trained_ae = True + trained_diff = True + thresh = results['thresh_max'] + text = f"{model},{dataset}\nROC_k = %.2f, F1_k = %.2f, ROC_max = %.2f, F1_max = %.2f\n best_k = %i, best_th = %.4f"%(results['ROC/AUC'], results['f1'], results['roc_max'], results['f1_max'], results['k'], results['thresh_max']) + TP = [1 if preds[i] and labels[i] else 0 for i in range(0, timestamps)] + FP = [1 if preds[i] and not labels[i] else 0 for i in range(0, timestamps)] + FN = [1 if not preds[i] and labels[i] else 0 for i in range(0, timestamps)] + + try: + ae = ae_recon[0:timestamps, dim] + except: + trained_ae = False + + try: + diff = diff_sample[0:timestamps, dim] + except: + trained_diff = False + + if trained_ae and trained_diff: + if plot_test: + fig, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) + # ax1.set_ylabel('Series') + # ax1.set_title(f'Dimension = {dim}') + ax1.set_title(text, fontsize=7) + ax1.plot(gt, linewidth=0.2) + ax1.set_ylim(0, 1) + #ax1.plot(preds, linewidth=0.2, color='orange') + ax1.plot(labels, '--', linewidth=0.3, color='red') + + ax2.set_ylim(0,1) + ax2.plot(ae, linewidth=0.2) + # ax2.plot(TP, '--', linewidth=0.3, color='green', label='TP') + # ax2.plot(FP, '--', linewidth=0.3, color='orange', label = 'FP') + # ax2.plot(FN, '--', linewidth=0.3, color='blue', label='FN') + ax2.legend(loc=(-0.1, -0.2), borderaxespad=0, fontsize='xx-small') + + ax3.set_ylim() + ax3.plot(diff, linewidth=0.2, label='diff') + # ax3.plot(TP, '--', linewidth=0.3, color='green') + # ax3.plot(FP, '--', linewidth=0.3, color='orange') + # ax3.plot(FN, '--', linewidth=0.3, color='blue') + ax2.fill_between(np.arange(labels.shape[0]), TP, color='green', alpha=0.2, linestyle='dashed', linewidth=0.3, label='TP') + ax2.fill_between(np.arange(labels.shape[0]), FP, color='orange', alpha=0.3, linestyle='dashed', linewidth=0.3, label='FP') + ax2.fill_between(np.arange(labels.shape[0]), FN, color='blue', alpha=0.2, linestyle='dashed', linewidth=0.3, label='FN') + + #ax4.fill_between(np.arange(l.shape[0]), pred, color='yellow', alpha=0.3) + + # ax2.plot(smooth(gt), linewidth=0.2, label='True') + # ax2.plot(smooth(diff), linewidth=0.2, label='Diff') + th = [thresh] * timestamps + + ax4.plot(score, linewidth=0.2) + ax4.set_xlabel('Timestamp') + ax4.set_ylabel('Score') + ax4.plot(th, '--', linewidth=0.2, alpha=0.5) + + else: + fig, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True) + ax2.set_ylim(min(ae),max(ae)) + ax3.set_ylim(min(diff),max(diff)) + ax1.plot(gt, linewidth=0.2, label='True') + ax2.plot(ae, linewidth=0.2, label='AE') + ax3.plot(diff, linewidth=0.2, label='diff') + #ax1.set_ylim(0,1) + else: + fig, (ax1, ax2, ax4) = plt.subplots(3, 1, sharex=True) + # ax1.set_ylabel('Series') + # ax1.set_title(f'Dimension = {dim}') + ax1.set_title(text, fontsize=7) + ax1.plot(gt, linewidth=0.2) + ax1.set_ylim(0, 1) + #ax1.plot(preds, linewidth=0.2, color='orange') + #ax1.plot(labels, '--', linewidth=0.3, color='red') + ax1.fill_between(np.arange(labels.shape[0]), labels, color='red', alpha=0.2, linestyle='dashed', linewidth=0.3) + + ax2.set_ylim(0,1) + if trained_ae: + ax2.plot(ae, linewidth=0.2) + else: + ax2.plot(diff, linewidth=0.2) + # ax2.plot(TP, '--', linewidth=0.3, color='green', label='TP') + # ax2.plot(FP, '--', linewidth=0.3, color='orange', label = 'FP') + # ax2.plot(FN, '--', linewidth=0.3, color='blue', label='FN') + ax2.fill_between(np.arange(labels.shape[0]), TP, color='green', alpha=0.2, linestyle='dashed', linewidth=0.3, label='TP') + ax2.fill_between(np.arange(labels.shape[0]), FP, color='orange', alpha=0.3, linestyle='dashed', linewidth=0.3, label='FP') + ax2.fill_between(np.arange(labels.shape[0]), FN, color='blue', alpha=0.2, linestyle='dashed', linewidth=0.3, label='FN') + ax2.legend(loc=(-0.15, -0.2), borderaxespad=0, fontsize='xx-small') + + # ax3.plot(TP, '--', linewidth=0.3, color='green') + # ax3.plot(FP, '--', linewidth=0.3, color='orange') + # ax3.plot(FN, '--', linewidth=0.3, color='blue') + + #ax4.fill_between(np.arange(l.shape[0]), pred, color='yellow', alpha=0.3) + + # ax2.plot(smooth(gt), linewidth=0.2, label='True') + # ax2.plot(smooth(diff), linewidth=0.2, label='Diff') + th = [thresh] * timestamps + + ax4.plot(score, linewidth=0.2) + ax4.set_xlabel('Timestamp') + ax4.set_ylabel('Score') + ax4.plot(th, '--', linewidth=0.2, alpha=0.5) + + + if dataset: + folder = f'plots/plots3/{model}_{dataset}' + else: + folder = f'plots/plots3/{model}' + os.makedirs(folder, exist_ok=True) + plt.savefig(f'{folder}/dim_{dim}_epoch_{epoch}.jpg') + plt.close() + return fig diff --git a/subject1-4/AdaDiff/src/parser.py b/subject1-4/AdaDiff/src/parser.py new file mode 100644 index 0000000000000000000000000000000000000000..9d37982ed1441146105893aa51b463b2938115f7 --- /dev/null +++ b/subject1-4/AdaDiff/src/parser.py @@ -0,0 +1,125 @@ +import argparse +from xmlrpc.client import Boolean + +parser = argparse.ArgumentParser(description='Time-Series Anomaly Detection') +parser.add_argument('--dataset', + metavar='-d', + type=str, + required=False, + default='point_global', + help="dataset"), +parser.add_argument('--file', + metavar='-f', + type=str, + required=False, + default=None, + help="dataset") +parser.add_argument('--model', + metavar='-m', + type=str, + required=False, + default='Autoencoder_Diffusion', + help="model name"), +parser.add_argument('--training', + metavar='-t', + type=str, + required=False, + default='both', + help="model to train"), +parser.add_argument('--anomaly_score', + metavar='-t', + type=str, + required=False, + default=None, + help="anomaly score"), +parser.add_argument('--lr', + metavar='-lr', + type=str, + required=False, + default='1e-4', + help="lerning rate"), +parser.add_argument('--window_size', + metavar='-ws', + type=str, + required=False, + default='10', + help="window size"), +parser.add_argument('--p1', + metavar='-p1', + type=float, + required=False, + default='1', + help="p1"), +parser.add_argument('--p2', + metavar='-p2', + type=float, + required=False, + default='1', + help="p2"), +parser.add_argument('--k', + metavar='-k', + type=int, + required=False, + default='1', + help="number of diff samples"), +parser.add_argument('--v', + metavar='-v', + type=bool, + required=False, + default=False, + help="verbose"), +# parser.add_argument('--test_only', +# metavar='-t', +# type=bool, +# required=False, +# default=False, +# help="test_only"), +parser.add_argument('--batch_size', + metavar='-t', + type=int, + required=False, + default=128, + help="batch_size"), +parser.add_argument('--diff_lambda', + metavar='-t', + type=float, + required=False, + default=0.1, + help="diff_lambda"), +parser.add_argument('--noise_steps', + metavar='-t', + type=int, + required=False, + default=100, + help="noise_steps"), +parser.add_argument('--denoise_steps', + metavar='-t', + type=int, + required=False, + default=10, + help="denoise_steps"), +parser.add_argument('--group', + metavar='-t', + type=str, + required=False, + default='search_smd', + help="wandb group"), +parser.add_argument('--test_only', + metavar='-t', + type=bool, + required=False, + default=False, + help="train new model or not"), +parser.add_argument('--id', + metavar='-t', + type=int, + required=False, + default=0, + help="experiment id for multiple runs"), +parser.add_argument('--get_thresh', + metavar='-t', + type=bool, + required=False, + default=False, + help="get val thresh again because forgot!"), +args = parser.parse_args() \ No newline at end of file diff --git a/subject1-4/AdaDiff/train_diffusion_val.py b/subject1-4/AdaDiff/train_diffusion_val.py new file mode 100644 index 0000000000000000000000000000000000000000..6088304281611d568f5f60c4f786054d15bd392e --- /dev/null +++ b/subject1-4/AdaDiff/train_diffusion_val.py @@ -0,0 +1,535 @@ +import pickle +import os +import torch +import pandas as pd +from tqdm import tqdm +from src.eval import evaluate +#from src.utils import * +from src.parser import * +from torch.utils.data import Dataset, DataLoader, TensorDataset +import torch.nn as nn +from time import time +from transformers import get_linear_schedule_with_warmup +from src.my_plotting import plotter +import matplotlib.pyplot as plt +import numpy as np +import wandb + +import torch +#from torch.utils.tensorboard import SummaryWriter +import argparse +from xmlrpc.client import Boolean + + + +#writer = SummaryWriter() + +device = 'cuda' + +def convert_to_windows(data, n_window): + windows = list(torch.split(data, n_window)) + + # print(ten.shape(windows), n_window) + for i in range (n_window-windows[-1].shape[0]): + windows[-1] = torch.cat((windows[-1], windows[-1][-1].unsqueeze(0))) + # print(np.shape(torch.stack(windows))) + return torch.stack(windows) + +# parametri: point_global, point_contextual etc +def load_dataset(dataset, part=None): + loader = [] + folder = 'DiffusionAE/processed/' + dataset + + for file in ['train', 'test', 'validation', 'labels', 'labels_validation']: + if part is None: + loader.append(np.load(os.path.join(folder, f'{file}.npy'))) + else: + loader.append(np.load(os.path.join(folder, f'{part}_{file}.npy'))) + train_loader = DataLoader(loader[0], batch_size=loader[0].shape[0]) + test_loader = DataLoader(loader[1], batch_size=loader[1].shape[0]) + validation_loader = DataLoader(loader[2], batch_size=loader[2].shape[0]) + return train_loader, test_loader, validation_loader, loader[3], loader[4] + +def load_model(training_mode, lr, window_size, p1, p2, dims, batch_size, noise_steps, denoise_steps): + from models2 import Autoencoder_Diffusion, TransformerBasicBottleneckScaling, TransformerBasicv2Scaling, ConditionalDiffusionTrainingNetwork + scheduler=None + model = None + diffusion_training_net = ConditionalDiffusionTrainingNetwork(dims, int(window_size), batch_size, noise_steps, denoise_steps).float() + diffusion_prediction_net = ConditionalDiffusionTrainingNetwork(dims, int(window_size), batch_size, noise_steps, denoise_steps, train=False).float() + + optimizer = torch.optim.Adam(diffusion_training_net.parameters(), lr=float(lr)) + # DIFFUSION size + # param_size = 0 + # for name, param in diffusion_training_net.named_parameters(): + # param_size += param.nelement() * param.element_size() + # print(f'{name} {param.size()}') + # buffer_size = 0 + # for buffer in diffusion_training_net.buffers(): + # buffer_size += buffer.nelement() * buffer.element_size() + # size_all_mb = (param_size + buffer_size) / 1024**2 + # print('diffusion size: {:.3f}MB'.format(size_all_mb)) + return model, diffusion_training_net, diffusion_prediction_net, optimizer, scheduler + +CHECKPOINT_FOLDER = 'anomaly-mts/a-mts/checkpoints' +def save_model(model, experiment, diffusion_training_net, optimizer, scheduler, anomaly_score, epoch, diff_loss, ae_loss): + folder = f'{CHECKPOINT_FOLDER}/{experiment}/' + os.makedirs(folder, exist_ok=True) + if model: + file_path_model = f'{folder}/model.ckpt' + torch.save({ + 'epoch': epoch, + 'ae_loss': ae_loss, + 'model_state_dict': model.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(),}, file_path_model) + file_path_diffusion = f'{folder}/diffusion.ckpt' + torch.save({ + 'epoch': epoch, + 'diffusion_loss': diff_loss, + 'model_state_dict': diffusion_training_net.state_dict(), + 'optimizer_state_dict': optimizer.state_dict(),}, file_path_diffusion) + print('saved model at ' + folder) + +def load_from_checkpoint(training_mode, experiment, model, diffusion_training_net): + folder = f'{CHECKPOINT_FOLDER}/{experiment}' + file_path_model = f'{folder}/model.ckpt' + file_path_diffusion = f'{folder}/diffusion.ckpt' + # load model + if training_mode == 'both': + checkpoint_model = torch.load(file_path_model) + model.load_state_dict(checkpoint_model['model_state_dict']) + else: + model = None + # load diffusion + checkpoint_diffusion = torch.load(file_path_diffusion) + diffusion_training_net.load_state_dict(checkpoint_diffusion['model_state_dict']) + return model, diffusion_training_net + +def get_diffusion_sample(diffusion_prediction_net, conditioner, k): + if k <= 1: + return diffusion_prediction_net(conditioner) + else: + diff_samples = [] + for _ in range(k): + diff_samples.append(diffusion_prediction_net(conditioner)) + return torch.mean(torch.stack(diff_samples), axis = 0) + +def backprop(epoch, model, diffusion_training_net, diffusion_prediction_net, data, diff_lambda, optimizer, scheduler, training_mode, anomaly_score, k, training = True): + l = nn.MSELoss(reduction = 'none') + data_x = torch.tensor(data, dtype=torch.float32); + dataset = TensorDataset(data_x, data_x) + # print(np.shape(data_x)) + bs = diffusion_training_net.batch_size if not model else model.batch + dataloader = DataLoader(dataset, batch_size = bs) + w_size = diffusion_training_net.window_size + l1s, diff_losses, ae_losses = [], [], [] + samples = [] + # cleaned = [] + # original = [] + # all_mins = [] + # all_maxs = [] + if training: + diffusion_training_net.train() + for d, _ in dataloader: + # print(np.shape(d)) + ##### Clean trend datset here + """mins = torch.min(d[:, :, 0], dim=1) + maxs = torch.max(d[:, :, 0], dim=1) + original.append(d) + diffs = maxs[0] - mins[0] + d = d[diffs < 0.04] + cleaned.append(d) + all_mins.append(mins) + all_maxs.append(maxs)""" + ##### + if args.model == 'Autoencoder_Diffusion': + local_bs = d.shape[0] + window = d.view(local_bs, -1) + else: + window = d + window = window.to(device) + + # diff only + # print(np.shape(window)) + window = window.reshape(-1, w_size, feats) + # print(np.shape(window)) + loss, _ = diffusion_training_net(window) + l1s.append(loss.item()) + optimizer.zero_grad() + loss.backward() + optimizer.step() + tqdm.write(f'Epoch {epoch},\tL1 = {np.mean(l1s)}') + tqdm.write(f'Epoch {epoch},\tAE = {np.mean(ae_losses)}') + tqdm.write(f'Epoch {epoch},\tDiff = {np.mean(diff_losses)}') + return np.mean(l1s), np.mean(ae_losses), np.mean(diff_losses) + else: + with torch.no_grad(): + + diffusion_prediction_net.load_state_dict(diffusion_training_net.state_dict()) + diffusion_prediction_net.eval() + diffusion_training_net.eval() + l1s = [] # scores + sum_losses = [] + ae_losses = [] + diff_losses = [] + recons = [] + for d, _ in dataloader: + if args.model == 'Autoencoder_Diffusion': + local_bs = d.shape[0] + window = d.view(local_bs, -1) + else: + window = d + window = window.to(device) + window_reshaped = window.reshape(-1, w_size, feats) + _, x_recon = diffusion_prediction_net(window_reshaped) + # x_recon = torch.squeeze(x_recon, 1) + x_recon = x_recon.transpose(2,1) + # print(np.shape(x_recon)) + # print(np.shape(window_reshaped)) + samples.append(x_recon) + loss = l(x_recon, window_reshaped) + l1s.append(loss) + return torch.cat(l1s).detach().cpu().numpy(), np.mean(sum_losses), np.mean(ae_losses), np.mean(diff_losses), torch.cat(samples).detach().cpu().numpy() + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Time-Series Anomaly Detection') + parser.add_argument('--dataset', + metavar='-d', + type=str, + required=False, + default='point_global', + help="dataset"), + parser.add_argument('--file', + metavar='-f', + type=str, + required=False, + default=None, + help="dataset") + parser.add_argument('--model', + metavar='-m', + type=str, + required=False, + default='Autoencoder_Diffusion', + help="model name"), + parser.add_argument('--training', + metavar='-t', + type=str, + required=False, + default='both', + help="model to train"), + parser.add_argument('--anomaly_score', + metavar='-t', + type=str, + required=False, + default=None, + help="anomaly score"), + parser.add_argument('--lr', + metavar='-lr', + type=str, + required=False, + default='1e-4', + help="lerning rate"), + parser.add_argument('--window_size', + metavar='-ws', + type=str, + required=False, + default='10', + help="window size"), + parser.add_argument('--p1', + metavar='-p1', + type=float, + required=False, + default='1', + help="p1"), + parser.add_argument('--p2', + metavar='-p2', + type=float, + required=False, + default='1', + help="p2"), + parser.add_argument('--k', + metavar='-k', + type=int, + required=False, + default='1', + help="number of diff samples"), + parser.add_argument('--v', + metavar='-v', + type=bool, + required=False, + default=False, + help="verbose"), + # parser.add_argument('--test_only', + # metavar='-t', + # type=bool, + # required=False, + # default=False, + # help="test_only"), + parser.add_argument('--batch_size', + metavar='-t', + type=int, + required=False, + default=128, + help="batch_size"), + parser.add_argument('--diff_lambda', + metavar='-t', + type=float, + required=False, + default=0.1, + help="diff_lambda"), + parser.add_argument('--noise_steps', + metavar='-t', + type=int, + required=False, + default=100, + help="noise_steps"), + parser.add_argument('--denoise_steps', + metavar='-t', + type=int, + required=False, + default=10, + help="denoise_steps"), + parser.add_argument('--group', + metavar='-t', + type=str, + required=False, + default='search_smd', + help="wandb group"), + parser.add_argument('--test_only', + metavar='-t', + type=bool, + required=False, + default=False, + help="train new model or not"), + parser.add_argument('--id', + metavar='-t', + type=int, + required=False, + default=0, + help="experiment id for multiple runs"), + args = parser.parse_args() + + config = { + "dataset": args.dataset, + "file": args.file, + "training_mode": args.training, + "model": args.model, + "learning_rate": float(args.lr), + "window_size": int(args.window_size), + "lambda": args.diff_lambda, + "noise_steps":args.noise_steps, + "batch_size": args.batch_size, + } + + #anomaly_scores = ['diffusion'] + anomaly_scores = [args.anomaly_score] + + if args.training == 'diffusion': + experiment = 'diffv4' + elif args.model == 'Autoencoder_Diffusion': + experiment = 'autoencoder_both' + elif args.model == 'TransformerBasicBottleneckScaling': + experiment = 'tr_bn_diffv4' + else: + experiment = 'tr_basic_diffv4' + + experiment += f'_{args.dataset}_{args.noise_steps}-{args.denoise_steps}_{args.diff_lambda}_{args.lr}_{args.batch_size}_{args.window_size}' + + if args.training == 'both': + experiment += f'_{anomaly_scores[0]}_score' + #experiment += f'_{args.id}' + + wandb.init(project="anomaly-mts", config=config, group=args.group) + wandb.run.name = experiment + + dataset_name = args.dataset + part = None if not args.file else args.file + training_mode = 'both' if not args.training else args.training + print(training_mode) + anomaly_score = None if not args.anomaly_score else args.anomaly_score + window_size = int(args.window_size) + synthetic_datasets = ['point_global', 'point_contextual', 'pattern_shapelet', 'pattern_seasonal', 'pattern_trend', 'all_types', 'pattern_trendv2'] + + train_loader, test_loader, validation_loader, labels, validation_labels = load_dataset(dataset_name, part) + model, diffusion_training_net, diffusion_prediction_net, optimizer, scheduler = \ + load_model(training_mode ,args.lr, args.window_size, args.p1, args.p2, labels.shape[1], args.batch_size, args.noise_steps, args.denoise_steps) + if model: + model = model.to(device) + + diffusion_training_net = diffusion_training_net.to(device) + diffusion_prediction_net = diffusion_prediction_net.to(device) + + trainD, testD, validationD = next(iter(train_loader)), next(iter(test_loader)), next(iter(validation_loader)) + trainO, testO, validationO = trainD, testD, validationD + if args.v: + print(f'\ntrainD.shape: {trainD.shape}') + print(f'testD.shape: {testD.shape}') + print(f'validationD.shape: {validationD.shape}') + print(f'labels.shape: {labels.shape}') + + feats=labels.shape[1] + + trainD, testD, validationD = convert_to_windows(trainD, window_size), convert_to_windows(testD, window_size), convert_to_windows(validationD, window_size) + #num_epochs = 500 if args.dataset in synthetic_datasets else 100 + num_epochs = 500 + + # while labels.shape[0]%window_size!=0: + # labels=np.concatenate((labels, np.expand_dims(labels[-1], 0)), axis=0) + + # while validation_labels.shape[0]%window_size!=0: + # validation_labels=np.concatenate((labels, np.expand_dims(labels[-1], 0)), axis=0) + # print('training') + epoch = -1 + + e = epoch + 1; start = time() + max_roc_scores = [[0, 0, 0]] * 6 + max_f1_scores = [[0, 0, 0]] * 6 + roc_scores = [] + f1_scores = [] + f1_max = 0 + roc_max = 0 + validation_thresh = 0 + # anomaly_scores = ['diffusion'] + #alpha = 0 + if not args.test_only: + for e in tqdm(list(range(epoch+1, epoch+num_epochs+1))): + train_loss, ae_loss, diff_loss = backprop(e, model, diffusion_training_net, diffusion_prediction_net, trainD, args.diff_lambda, optimizer, scheduler, training_mode, anomaly_score, args.k) + wandb.log({ + 'sum_loss_train': train_loss, + 'ae_loss_train': ae_loss, + 'diff_loss_train': diff_loss, + 'epoch': e + }, step=e) + if train_loss < 0.15: + loss0, _, _, val_loss, samples = backprop(e, model, diffusion_training_net, diffusion_prediction_net, validationD, args.diff_lambda, optimizer, scheduler, training_mode, args.anomaly_score, args.k, training=False) + wandb.log({'val_loss': loss0.mean(), 'epoch': e}, step=e) + loss0 = loss0.reshape(-1,feats) + lossFinal = np.mean(np.array(loss0), axis=1) + labelsFinal = (np.sum(validation_labels, axis=1) >= 1) + 0 + result, fprs, tprs = evaluate(lossFinal, labelsFinal) + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + wandb.log({'roc': result_roc, 'f1': result_f1}, step=e) + if result_f1 > f1_max: + save_model(None, experiment, diffusion_prediction_net, optimizer, None, -1, e, train_loss, None) + f1_max = result_f1 + validation_thresh = result['threshold'] + wandb.run.summary["best_f1"] = f1_max + wandb.run.summary["roc_for_best_f1"] = result_roc + wandb.run.summary["best_f1_epoch"] = e + wandb.run.summary["validation_thresh"] = validation_thresh + if result_roc > roc_max: + roc_max = result_roc + wandb.run.summary["f1_for_best_roc"] = result_f1 + wandb.run.summary["best_roc"] = roc_max + wandb.run.summary["best_roc_epoch"] = e + wandb.log({'roc': result_roc, 'f1': result_f1}, step=e) + if e % 100 == 0: + for dim in range(0, feats): + plotter(f'{experiment}_VAL', args.dataset, validationD.reshape(-1, feats), lossFinal, labelsFinal, result, None, samples.reshape(-1, feats), None, dim=dim, plot_test=True, epoch=e) + if args.v: + print(f"testing loss #{e}: {loss0.mean()}") + # print(f"training loss #{e}: {loss1.mean()}") + print(f"final ROC #{e}: {result_roc}") + print(f"F1 #{e}: {result_f1}") + + # TEST ON TEST SET + #load model from checkpoint + model, diffusion_training_net, diffusion_prediction_net, optimizer, scheduler = \ + load_model(training_mode ,args.lr, args.window_size, args.p1, args.p2, labels.shape[1], args.batch_size, args.noise_steps, args.denoise_steps) + model, diffusion_training_net = load_from_checkpoint(training_mode, experiment, model, diffusion_training_net) + if model: + model = model.to(device) + + diffusion_training_net = diffusion_training_net.to(device) + diffusion_prediction_net = diffusion_prediction_net.to(device) + # pass test set through the model + if model: + if args.test_only: + #test again on val for double check + get best thresh on validation set to use for test + loss0, val_loss, ae_loss_val, diff_loss_val, samples, recons = backprop(e, model, diffusion_training_net, diffusion_prediction_net, validationD, args.diff_lambda, optimizer, scheduler, training_mode, args.anomaly_score, args.k, training=False) + loss0 = loss0.reshape(-1,feats) + + lossFinal = np.mean(np.array(loss0), axis=1) + # lossFinal = np.max(np.array(loss0), axis=1) + labelsFinal = (np.sum(validation_labels, axis=1) >= 1) + 0 + + result, fprs, tprs = evaluate(lossFinal, labelsFinal) + validation_thresh = result['threshold'] + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + wandb.run.summary["f1_val"] = result_f1 + wandb.run.summary["roc_val"] = result_roc + wandb.run.summary["f1_pa_val"] = result['f1_max'] + wandb.run.summary["roc_pa_val"] = result['roc_max'] + wandb.run.summary["val_loss"] = val_loss + wandb.run.summary["ae_loss_val"] = ae_loss_val + wandb.run.summary["diff_loss_val"] = diff_loss_val + + # for dim in range(0, feats): + # fig = plotter(f'{experiment}_VAL', args.anomaly_score, validationD.reshape(-1, feats), lossFinal, labelsFinal, result, recons.reshape(-1, feats), samples.reshape(-1, feats), None, dim=dim, plot_test=True, epoch=e) + + loss0, test_loss, ae_loss_test, diff_loss_test, samples, recons = backprop(e, model, diffusion_training_net, diffusion_prediction_net, testD, args.diff_lambda, optimizer, scheduler, training_mode, args.anomaly_score, args.k, training=False) + loss0 = loss0.reshape(-1,feats) + + lossFinal = np.mean(np.array(loss0), axis=1) + # np.save(f'{args.dataset}_{args.anomaly_score}_score_scores.npy', lossFinal) + # np.save(f'{args.dataset}_{args.anomaly_score}_score_recons.npy', samples) + # # np.save('/root/Diff-Anomaly/TranAD/plots_for_paper/shapelet_scores_for_example.npy', lossFinal) + # lossFinal = np.max(np.array(loss0), axis=1) + labelsFinal = (np.sum(labels, axis=1) >= 1) + 0 + #validation_thresh = 0.0019 + result = evaluate(lossFinal, labelsFinal) + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + wandb.run.summary["f1_test"] = result_f1 + wandb.run.summary["roc_test"] = result_roc + wandb.run.summary["f1_pa_test"] = result['f1_max'] + #wandb.run.summary["roc_pa_test"] = result['roc_max'] + wandb.run.summary["test_loss"] = test_loss + wandb.run.summary["ae_loss_test"] = ae_loss_test + wandb.run.summary["diff_loss_test"] = diff_loss_test + wandb.run.summary["validation_thresh"] = validation_thresh + + #for dim in range(0, feats): + # fig = plotter(f'{experiment}_TEST', args.anomaly_score, testD.reshape(-1, feats), lossFinal, labelsFinal, result, recons.reshape(-1, feats), samples.reshape(-1, feats), None, dim=dim, plot_test=True, epoch=e) + + else: + if args.test_only: + loss0, _, _, val_loss, samples = backprop(e, model, diffusion_training_net, diffusion_prediction_net, validationD, args.diff_lambda, optimizer, scheduler, training_mode, args.anomaly_score, args.k, training=False) + loss0 = loss0.reshape(-1,feats) + + lossFinal = np.mean(np.array(loss0), axis=1) + labelsFinal = (np.sum(validation_labels, axis=1) >= 1) + 0 + + result, fprs, tprs = evaluate(lossFinal, labelsFinal) + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + validation_thresh = result['threshold'] + wandb.run.summary["f1_val"] = result_f1 + wandb.run.summary["roc_val"] = result_roc + wandb.run.summary["f1_pa_val"] = result['f1_max'] + #wandb.run.summary["roc_pa_val"] = result['roc_max'] + wandb.run.summary["val_loss"] = val_loss + wandb.run.summary["validation_thresh"] = validation_thresh + #for dim in range(0, feats): + # plotter(f'{experiment}_VAL', args.dataset, validationD.reshape(-1, feats), lossFinal, labelsFinal, result, None, samples.reshape(-1, feats), None, dim=dim, plot_test=True, epoch=e) + loss0, _, _, test_loss, samples = backprop(e, model, diffusion_training_net, diffusion_prediction_net, testD, args.diff_lambda, optimizer, scheduler, training_mode, args.anomaly_score, args.k, training=False) + loss0 = loss0.reshape(-1,feats) + + lossFinal = np.mean(np.array(loss0), axis=1) + # np.save(f'{args.dataset}_diff_only_scores.npy', lossFinal) + # np.save(f'{args.dataset}_diff_only_recons.npy', samples) + # labelsFinal = (np.sum(labels, axis=1) >= 1) + 0 + + result = evaluate(lossFinal, labelsFinal, validation_thresh=validation_thresh) + result_roc = result["ROC/AUC"] + result_f1 = result["f1"] + #for dim in range(0, feats): + # plotter(f'{experiment}_TEST', args.dataset, testD.reshape(-1, feats), lossFinal, labelsFinal, result, None, samples.reshape(-1, feats), None, dim=dim, plot_test=True, epoch=e) + wandb.run.summary["f1_test"] = result_f1 + wandb.run.summary["roc_test" ] = result_roc + wandb.run.summary["f1_pa_test"] = result['f1_max'] + wandb.run.summary["roc_pa_test"] = result['roc_max'] + wandb.run.summary["test_loss"] = test_loss + + wandb.finish() diff --git a/subject1-4/AdaDiff/unet2.py b/subject1-4/AdaDiff/unet2.py new file mode 100644 index 0000000000000000000000000000000000000000..5a798f90bf262c1fc0aa04ef4524a9991ea39efc --- /dev/null +++ b/subject1-4/AdaDiff/unet2.py @@ -0,0 +1,399 @@ +import math +from inspect import isfunction +from functools import partial + +import matplotlib.pyplot as plt +from tqdm.auto import tqdm +from einops import rearrange, reduce +from einops.layers.torch import Rearrange + +import torch +from torch import nn, einsum +import torch.nn.functional as F +import math + + +def exists(x): + return x is not None + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +def num_to_groups(num, divisor): + groups = num // divisor + remainder = num % divisor + arr = [divisor] * groups + if remainder > 0: + arr.append(remainder) + return arr + + +class Residual(nn.Module): + def __init__(self, fn): + super().__init__() + self.fn = fn + + def forward(self, x, *args, **kwargs): + return self.fn(x, *args, **kwargs) + x + + +# def Upsample(dim, dim_out=None): +# return nn.Sequential( +# nn.Upsample(scale_factor=2, mode="nearest"), +# nn.Conv2d(dim, default(dim_out, dim), 3, padding=1), +# ) + +def Upsample(dim_out, dim, size): + return nn.Sequential( + nn.Upsample(size=[size[0][1], size[0][2]], mode="nearest"), + nn.Conv2d(dim_out, default(dim, dim_out), 3, padding=1), + ) + + + +# def Downsample(dim, dim_out=None): +# # No More Strided Convolutions or Pooling +# return nn.Sequential( +# Rearrange("b c (h p1) (w p2) -> b (c p1 p2) h w", p1=2, p2=2), +# nn.Conv2d(dim * 4, default(dim_out, dim), 1), +# ) + +# class Downsample(nn.Module): +# def __init__(self, dim_in, dim_out, size): +# super().__init__() +# self.dim_in = dim_in +# self.dim_out = dim_out +# self.size = size +# self.original = self.size[0] * self.size[1] * self.size[2] +# self.left_out = int(self.original/((self.size[1] // 2) * (self.size[2]// 2)) - 4*self.size[0]) + +# #self.left_out = self.original // ((self.size[1] // 2) * (self.size[2]// 2)) - 4*self.size[0] +# self.conv = nn.Conv2d(dim_in * 4 + self.left_out, default(self.dim_out, self.dim_in), 1) + +# def forward(self, x): +# x = x.reshape(-1, self.dim_in*4 + self.left_out, self.size[1] // 2, self.size[2] // 2) +# x = self.conv(x) +# return x + +# class Downsample(nn.Module): +# def __init__(self, dim_in, dim_out, size): +# super().__init__() +# self.dim_in = dim_in +# self.dim_out = dim_out +# self.size = size +# self.intermediate_channels = F.interpolate(torch.rand(size).unsqueeze(0).cpu(), (self.size[1] // 2, self.size[2] // 2)).shape[1] +# self.conv = nn.Conv2d(self.intermediate_channels, default(self.dim_out, self.dim_in), 1) + +# def forward(self, x): +# x = F.interpolate(x, (self.size[1] // 2, self.size[2] // 2)) +# x = self.conv(x) +# return x + +class Downsample(nn.Module): + def __init__(self, dim_in, dim_out, size): + super().__init__() + self.dim_in = dim_in + self.dim_out = dim_out + self.size = size + self.pad = nn.ZeroPad2d((0, self.size[-1] % 2, 0, self.size[-2] % 2)) + self.conv = nn.Conv2d(dim_in * 4, default(self.dim_out, self.dim_in), 1) + self.padded_size = (self.size[0], self.size[1] + self.size[1] % 2, self.size[2] + self.size[2] % 2) + + def forward(self, x): + # h, w = self.size[-2], self.size[-1] + # h_pad, w_pad = h % 2, w % 2 + x = self.pad(x) + x = x.reshape(-1, self.dim_in*4, self.padded_size[1] // 2, self.padded_size[2] // 2) + x = self.conv(x) + return x + + + +class SinusoidalPositionEmbeddings(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, time): + device = time.device + half_dim = self.dim // 2 + embeddings = math.log(10000) / max((half_dim - 1), 1) + embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings) + embeddings = time[:, None] * embeddings[None, :] + embeddings = torch.cat((embeddings.sin(), embeddings.cos()), dim=-1) + return embeddings + + +class WeightStandardizedConv2d(nn.Conv2d): + """ + https://arxiv.org/abs/1903.10520 + weight standardization purportedly works synergistically with group normalization + """ + + def forward(self, x): + eps = 1e-5 if x.dtype == torch.float32 else 1e-3 + + weight = self.weight + mean = reduce(weight, "o ... -> o 1 1 1", "mean") + var = reduce(weight, "o ... -> o 1 1 1", partial(torch.var, unbiased=False)) + normalized_weight = (weight - mean) * (var + eps).rsqrt() + + return F.conv2d( + x, + normalized_weight, + self.bias, + self.stride, + self.padding, + self.dilation, + self.groups, + ) + + +class Block(nn.Module): + def __init__(self, dim, dim_out, groups=8): + super().__init__() + self.proj = WeightStandardizedConv2d(dim, dim_out, 3, padding=1) + self.norm = nn.GroupNorm(groups, dim_out) + self.act = nn.SiLU() + + def forward(self, x, scale_shift=None): + x = self.proj(x) + x = self.norm(x) + + if exists(scale_shift): + scale, shift = scale_shift + x = x * (scale + 1) + shift + + x = self.act(x) + return x + + +class ResnetBlock(nn.Module): + """https://arxiv.org/abs/1512.03385""" + + def __init__(self, dim, dim_out, *, time_emb_dim=None, groups=8): + super().__init__() + self.mlp = ( + nn.Sequential(nn.SiLU(), nn.Linear(time_emb_dim, dim_out * 2)) + if exists(time_emb_dim) + else None + ) + + self.block1 = Block(dim, dim_out, groups=groups) + self.block2 = Block(dim_out, dim_out, groups=groups) + self.res_conv = nn.Conv2d(dim, dim_out, 1) if dim != dim_out else nn.Identity() + + def forward(self, x, time_emb=None): + scale_shift = None + if exists(self.mlp) and exists(time_emb): + time_emb = self.mlp(time_emb) + time_emb = rearrange(time_emb, "b c -> b c 1 1") + scale_shift = time_emb.chunk(2, dim=1) + + h = self.block1(x, scale_shift=scale_shift) + h = self.block2(h) + return h + self.res_conv(x) + +class Attention(nn.Module): + def __init__(self, dim, heads=4, dim_head=32): + super().__init__() + self.scale = dim_head**-0.5 + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) + self.to_out = nn.Conv2d(hidden_dim, dim, 1) + + def forward(self, x): + b, c, h, w = x.shape + qkv = self.to_qkv(x).chunk(3, dim=1) + q, k, v = map( + lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv + ) + q = q * self.scale + + sim = einsum("b h d i, b h d j -> b h i j", q, k) + sim = sim - sim.amax(dim=-1, keepdim=True).detach() + attn = sim.softmax(dim=-1) + + out = einsum("b h i j, b h d j -> b h i d", attn, v) + out = rearrange(out, "b h (x y) d -> b (h d) x y", x=h, y=w) + return self.to_out(out) + +class LinearAttention(nn.Module): + def __init__(self, dim, heads=4, dim_head=32): + super().__init__() + self.scale = dim_head**-0.5 + self.heads = heads + hidden_dim = dim_head * heads + self.to_qkv = nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) + + self.to_out = nn.Sequential(nn.Conv2d(hidden_dim, dim, 1), + nn.GroupNorm(1, dim)) + + def forward(self, x): + b, c, h, w = x.shape + qkv = self.to_qkv(x).chunk(3, dim=1) + q, k, v = map( + lambda t: rearrange(t, "b (h c) x y -> b h c (x y)", h=self.heads), qkv + ) + + q = q.softmax(dim=-2) + k = k.softmax(dim=-1) + + q = q * self.scale + context = torch.einsum("b h d n, b h e n -> b h d e", k, v) + + out = torch.einsum("b h d e, b h d n -> b h e n", context, q) + out = rearrange(out, "b h c (x y) -> b (h c) x y", h=self.heads, x=h, y=w) + return self.to_out(out) + +class PreNorm(nn.Module): + def __init__(self, dim, fn): + super().__init__() + self.fn = fn + self.norm = nn.GroupNorm(1, dim) + + def forward(self, x): + x = self.norm(x) + return self.fn(x) + + +class Unet(nn.Module): + def __init__( + self, + dim, + init_size=None, + init_dim=None, + out_dim=None, + dim_mults=(1, 2, 4), + channels=3, + self_condition=False, + resnet_block_groups=4, + ): + super().__init__() + + # determine dimensions + self.init_size = init_size + self.channels = channels + self.self_condition = self_condition + input_channels = channels * (2 if self_condition else 1) + + init_dim = default(init_dim, dim) + # print(input_channels) + # print(init_dim) + # print(init_size) + self.init_conv = nn.Conv2d(input_channels, init_dim, 1, padding=0) # changed to 1 and 0 from 7,3 + + dims = [init_dim, *map(lambda m: dim * m, dim_mults)] + in_out = list(zip(dims[:-1], dims[1:])) + + block_klass = partial(ResnetBlock, groups=resnet_block_groups) + + # time embeddings + time_dim = dim * 4 + + self.time_mlp = nn.Sequential( + SinusoidalPositionEmbeddings(dim), + nn.Linear(dim-1, time_dim), + nn.GELU(), + nn.Linear(time_dim, time_dim), + ) + + # layers + self.downs = nn.ModuleList([]) + self.ups = nn.ModuleList([]) + num_resolutions = len(in_out) + sampling_sizes = [] + + for ind, (dim_in, dim_out) in enumerate(in_out): + is_last = ind >= (num_resolutions - 1) + size = init_size if ind == 0 else torch.Size([dim_in, (last_size[1] + last_size[1] % 2) // 2, (last_size[2] + last_size[2] % 2) // 2]) + last_size = size + sampling_sizes.append((size, dim_out)) + self.downs.append( + nn.ModuleList( + [ + block_klass(dim_in, dim_in, time_emb_dim=time_dim), + block_klass(dim_in, dim_in, time_emb_dim=time_dim), + Residual(PreNorm(dim_in, LinearAttention(dim_in))), + Downsample(dim_in, dim_out, size) + if not is_last + else nn.Conv2d(dim_in, dim_out, 3, padding=1), + ] + ) + ) + + mid_dim = dims[-1] + self.mid_block1 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim) + self.mid_attn = Residual(PreNorm(mid_dim, Attention(mid_dim))) + self.mid_block2 = block_klass(mid_dim, mid_dim, time_emb_dim=time_dim) + sampling_sizes = list(reversed(sampling_sizes))[1:] + for ind, (dim_in, dim_out) in enumerate(reversed(in_out)): + is_last = ind == (len(in_out) - 1) + + self.ups.append( + nn.ModuleList( + [ + block_klass(dim_out + dim_in, dim_out, time_emb_dim=time_dim), + block_klass(dim_out + dim_in, dim_out, time_emb_dim=time_dim), + Residual(PreNorm(dim_out, LinearAttention(dim_out))), + #Upsample(dim_out, dim_in) + Upsample(dim_out, dim_in, sampling_sizes[ind]) + if not is_last + else nn.Conv2d(dim_out, dim_in, 3, padding=1), + ] + ) + ) + + self.out_dim = default(out_dim, channels) + + self.final_res_block = block_klass(dim * 2, dim, time_emb_dim=time_dim) + self.final_conv = nn.Conv2d(dim, self.out_dim, 1) + + def forward(self, x, time, x_self_cond=None): + import numpy as np + # print(np.shape(x)) + if self.self_condition: + x_self_cond = default(x_self_cond, lambda: torch.zeros_like(x)) + x = torch.cat((x_self_cond, x), dim=1) + + x = self.init_conv(x) + r = x.clone() + + t = self.time_mlp(time) + + h = [] + + for block1, block2, attn, downsample in self.downs: + x = block1(x, t) + h.append(x) + + x = block2(x, t) + x = attn(x) + h.append(x) + + x = downsample(x) + + x = self.mid_block1(x, t) + #h.append(x) + x = self.mid_attn(x) + x = self.mid_block2(x, t) + #h.append(x) + + for block1, block2, attn, upsample in self.ups: + x = torch.cat((x, h.pop()), dim=1) + x = block1(x, t) + + x = torch.cat((x, h.pop()), dim=1) + x = block2(x, t) + x = attn(x) + + x = upsample(x) + + x = torch.cat((x, r), dim=1) + + x = self.final_res_block(x, t) + return self.final_conv(x) \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..bb9a3a7cb5db625fe06e0a4d3cc21576345eb283 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "MSL_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "//experiments/MSL_TRAIN_16_128_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "msl_test", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/msl/msl_test.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 2001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..8440faa95a4a4e4ff00fc9a39b5b13e4c845eaf6 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/msl_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "MSL_TRAIN", + "phase": "train", + "gpu_ids": [ + 1, + 3 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "msl_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/msl/msl_train.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 100, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..a78f9daf724cf1e414115df89a1f06b4160272ea --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "PSM_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "experiments/PSM_TRAIN_16_128_100/checkpoint/E20" + }, + "datasets": { + "test": { + "name": "psm_test", + "mode": "HR", + "dataroot": "tf_dataset/psm/psm_test.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 2001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..c9cd489fd11c86ae208fddd76de65ede3aaecacc --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/psm_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "PSM_TRAIN", + "phase": "train", + "gpu_ids": [ + 0, + 1 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "psm_train", + "mode": "HR", + "dataroot": "tf_dataset/psm/psm_train.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 10, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..c73f7135dddd2ded804a915ee34a58d7c8698f84 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "SMAP_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "//experiments/SMAP_TRAIN_128_2048_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "smap_test", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smap/smap_test.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 3001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d19094abb82daad489a68aee140b9919f832f4 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smap_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "SMAP_TRAIN", + "phase": "train", + "gpu_ids": [ + 1, + 3 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "smap_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smap/smap_train.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 10, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..0d93cbe1f696086a3b6b85e5beed3368d609a8c6 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "SMD_TEST", + "phase": "test", + "gpu_ids": [ + 2 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "//experiments/SMD_TRAIN_128_2048_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "smd_test", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smd/smd_test.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 1001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..12e176f8e41c58c944f533031a80e70c84959a04 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/config/smd_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "SMD_TRAIN", + "phase": "train", + "gpu_ids": [ + 0, + 1 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "smd_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smd/smd_train.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "batch_size": 8, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 100, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9a333c0111ab4934db4e3607384f137a62b48f4c Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4b8153a73d945c91a502f976631af3d933396061 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a8816259b8e18c7623868a4476e80ed31657e93 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/logger.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bfe4ce0ecb9a3aab8ed27d9654d55f361fff0b70 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2e33c5e984edf893136343a236cecfacbfd45385 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cdad6bde4bcab4d4ddfbbbe7becf97cae141ac4 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/core/__pycache__/metrics.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/logger.py b/subject1-4/dynamicSplit/02DiffAD-main_high/core/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..672af1886ba88de1caa889d7d90b649e34279b73 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/core/logger.py @@ -0,0 +1,143 @@ +import json +import logging +import os +from collections import OrderedDict +from datetime import datetime + + +def mkdirs(paths): + if isinstance(paths, str): + os.makedirs(paths, exist_ok=True) + else: + for path in paths: + os.makedirs(path, exist_ok=True) + + +def get_timestamp(): + return datetime.now().strftime('%y%m%d_%H%M%S') + + +def parse(args, model_epoch=None): + phase = args.phase + opt_path = args.config + gpu_ids = args.gpu_ids + enable_wandb = args.enable_wandb + # remove comments starting with '//' + json_str = '' + with open(opt_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.split('//')[0] + '\n' + json_str += line + + opt = json.loads(json_str, object_pairs_hook=OrderedDict) + + # set log directory + if args.debug: + opt['name'] = 'debug_{}'.format(opt['name']) + if opt['phase'] == 'train': + experiments_root = os.path.join( + 'experiments', '{}_{}_{}_{}'.format(opt['name'], opt['datasets']['train']['l_resolution'], + opt['datasets']['train']['r_resolution'], + opt['model']['beta_schedule']['train']['n_timestep'])) + elif opt['phase'] == 'test': + experiments_root = os.path.join( + 'experiments', '{}_{}_{}_{}_{}'.format(opt['name'], opt['datasets']['test']['l_resolution'], + opt['datasets']['test']['r_resolution'], + opt['model']['beta_schedule']['test']['n_timestep'], model_epoch)) + + opt['path']['experiments_root'] = experiments_root + for key, path in opt['path'].items(): + if 'resume' not in key and 'experiments' not in key: + opt['path'][key] = os.path.join(experiments_root, path) + mkdirs(opt['path'][key]) + + opt['phase'] = phase + + # export CUDA_VISIBLE_DEVICES + if gpu_ids is not None: + opt['gpu_ids'] = [int(id) for id in gpu_ids.split(',')] + gpu_list = gpu_ids + else: + gpu_list = ','.join(str(x) for x in opt['gpu_ids']) + os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list + print('export CUDA_VISIBLE_DEVICES=' + gpu_list) + + if len(gpu_list) > 1: + opt['distributed'] = True + else: + opt['distributed'] = False + + # debug + if 'debug' in opt['name']: + opt['train']['print_freq'] = 2 + opt['train']['save_checkpoint_freq'] = 3 + opt['datasets']['train']['batch_size'] = 2 + opt['model']['beta_schedule']['train']['n_timestep'] = 10 + opt['datasets']['train']['data_len'] = 6 + + # W&B Logging + try: + log_wandb_ckpt = args.log_wandb_ckpt + opt['log_wandb_ckpt'] = log_wandb_ckpt + except: + pass + try: + log_eval = args.log_eval + opt['log_eval'] = log_eval + except: + pass + try: + log_infer = args.log_infer + opt['log_infer'] = log_infer + except: + pass + opt['enable_wandb'] = enable_wandb + + return opt + + +class NoneDict(dict): + def __missing__(self, key): + return None + + +# convert to NoneDict, which return None for missing key. +def dict_to_nonedict(opt): + if isinstance(opt, dict): + new_opt = dict() + for key, sub_opt in opt.items(): + new_opt[key] = dict_to_nonedict(sub_opt) + return NoneDict(**new_opt) + elif isinstance(opt, list): + return [dict_to_nonedict(sub_opt) for sub_opt in opt] + else: + return opt + + +def dict2str(opt, indent_l=1): + '''dict to string for logger''' + msg = '' + for k, v in opt.items(): + if isinstance(v, dict): + msg += ' ' * (indent_l * 2) + k + ':[\n' + msg += dict2str(v, indent_l + 1) + msg += ' ' * (indent_l * 2) + ']\n' + else: + msg += ' ' * (indent_l * 2) + k + ': ' + str(v) + '\n' + return msg + + +def setup_logger(logger_name, root, phase, level=logging.INFO, screen=False): + '''set up logger''' + l = logging.getLogger(logger_name) + formatter = logging.Formatter( + '%(asctime)s.%(msecs)03d - %(levelname)s: %(message)s', datefmt='%y-%m-%d %H:%M:%S') + log_file = os.path.join(root, '{}.log'.format(phase)) + fh = logging.FileHandler(log_file, mode='w') + fh.setFormatter(formatter) + l.setLevel(level) + l.addHandler(fh) + if screen: + sh = logging.StreamHandler() + sh.setFormatter(formatter) + l.addHandler(sh) diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/core/metrics.py b/subject1-4/dynamicSplit/02DiffAD-main_high/core/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..2691ae949afc35227a77dbe1b582e5916ec238bf --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/core/metrics.py @@ -0,0 +1,169 @@ +import numpy as np +import pandas as pd +from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, mean_squared_error + + +def squeeze_tensor(tensor): + return tensor.squeeze().cpu() + + +def update_csv_col_name(all_datas): + df = all_datas.copy() + df.columns = [0, 1, 2, 3] + + return df + + +def tensor2allcsv(visuals, col_num): + df = pd.DataFrame() + sr_df = pd.DataFrame(squeeze_tensor(visuals['SR'])) + ori_df = pd.DataFrame(squeeze_tensor(visuals['ORI'])) + lr_df = pd.DataFrame(squeeze_tensor(visuals['LR'])) + inf_df = pd.DataFrame(squeeze_tensor(visuals['INF'])) + + if col_num != 1: + for i in range(col_num, sr_df.shape[1]): + sr_df.drop(labels=i, axis=1, inplace=True) + ori_df.drop(labels=i, axis=1, inplace=True) + lr_df.drop(labels=i, axis=1, inplace=True) + inf_df.drop(labels=i, axis=1, inplace=True) + + df['SR'] = sr_df.mean(axis=1) + df['ORI'] = ori_df.mean(axis=1) + df['LR'] = lr_df.mean(axis=1) + df['INF'] = inf_df.mean(axis=1) + + df['differ'] = (ori_df - sr_df).abs().mean(axis=1) + df['label'] = squeeze_tensor(visuals['label']) + + differ_df = (sr_df - ori_df) + + return df, sr_df, differ_df + + +def merge_all_csv(all_datas, all_data): + all_datas = pd.concat([all_datas, all_data]) + return all_datas + + +def save_csv(data, data_path): + data.to_csv(data_path, index=False) + + +def get_mean(df): + mean = df['value'].astype('float32').mean() + normal_mean = df['value'][df['label'] == 0].astype('float32').mean() + anomaly_mean = df['value'][df['label'] == 1].astype('float32').mean() + + return mean, normal_mean, anomaly_mean + + +def get_val_mean(df): + mean_dict = {} + + ori = 'ORI' + ori_mean = df[ori].astype('float32').mean() + ori_normal_mean = df[ori][df['label'] == 0].astype('float32').mean() + ori_anomaly_mean = df[ori][df['label'] == 1].astype('float32').mean() + + gen_mean = df['SR'].astype('float32').mean() + gen_normal_mean = df['SR'][df['label'] == 0].astype('float32').mean() + gen_anomaly_mean = df['SR'][df['label'] == 1].astype('float32').mean() + + mean_dict['MSE'] = mean_squared_error(df[ori], df['SR']) + + mean_dict['ori_mean'] = ori_mean + mean_dict['ori_normal_mean'] = ori_normal_mean + mean_dict['ori_anomaly_mean'] = ori_anomaly_mean + + mean_dict['gen_mean'] = gen_mean + mean_dict['gen_normal_mean'] = gen_normal_mean + mean_dict['gen_anomaly_mean'] = gen_anomaly_mean + + mean_dict['mean_differ'] = ori_mean - gen_mean + mean_dict['normal_mean_differ'] = ori_normal_mean - gen_normal_mean + mean_dict['anomaly_mean_differ'] = ori_anomaly_mean - gen_anomaly_mean + + mean_dict['ori_no-ano_differ'] = ori_normal_mean - ori_anomaly_mean + mean_dict['ori_mean-no_differ'] = ori_mean - ori_normal_mean + mean_dict['ori_mean-ano_differ'] = ori_mean - ori_anomaly_mean + + mean_dict['gen_no-ano_differ'] = gen_normal_mean - gen_anomaly_mean + mean_dict['gen_mean-no_differ'] = gen_mean - gen_normal_mean + mean_dict['gen_mean-ano_differ'] = gen_mean - gen_anomaly_mean + + return mean_dict + + +def relabeling_strategy(df, params): + y_true = [] + best_N = 0 + best_f1 = -1 + best_thred = 0 + best_predictions = [] + thresholds = np.arange(params['start_label'], params['end_label'], params['step_label']) + + df_sort = df.sort_values(by="differ", ascending=False) + df_sort = df_sort.reset_index(drop=False) + + for t in thresholds: + # if (t - 1) % params['step_t'] == 0: + # print("t: ", t) + y_true, y_pred, thred = predict_labels(df_sort, t) + for i in range(len(y_true)): + if y_pred[i] == 1 and y_true[i] == 1: + j = i - 1 + while j >= 0 and y_true[j] == 1 and y_pred[j] == 0: + y_pred[j] = 1 + j -= 1 + j = i + 1 + while j < len(y_pred) and y_true[j] == 1 and y_pred[j] == 0: + y_pred[j] = 1 + j += 1 + + f1 = calculate_f1(y_true, y_pred) + if f1 > best_f1: + best_f1 = f1 + best_N = t + best_thred = thred + best_predictions = y_pred + + accuracy = calculate_accuracy(y_true, best_predictions) + precision = calculate_precision(y_true, best_predictions) + recall = calculate_recall(y_true, best_predictions) + + return best_f1,accuracy,precision,recall + + +def predict_labels(df_sort, num): + df_sort['pred_label'] = 0 + df_sort.loc[0:num - 1, 'pred_label'] = 1 + thred = df_sort.loc[num - 1, 'differ'] + + df_sort = df_sort.set_index('index') + df_sort = df_sort.sort_index() + + y_true = df_sort['label'].tolist() + y_pred = df_sort['pred_label'].tolist() + + return y_true, y_pred, thred + + +def calculate_accuracy(y_true, y_pred): + accuracy = accuracy_score(y_true, y_pred) + return accuracy + + +def calculate_precision(y_true, y_pred): + precision = precision_score(y_true, y_pred) + return precision + + +def calculate_recall(y_true, y_pred): + recall = recall_score(y_true, y_pred) + return recall + + +def calculate_f1(y_true, y_pred): + f1 = f1_score(y_true, y_pred) + return f1 diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/LRHR_dataset.py b/subject1-4/dynamicSplit/02DiffAD-main_high/data/LRHR_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b22f92136b774244541be2fbf50193d89e6e1a35 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/data/LRHR_dataset.py @@ -0,0 +1,43 @@ +from torch.utils.data import Dataset + +from data.prepare_time_data import PrepareTimeData + + +class LRHRDataset(Dataset): + def __init__(self, dataroot, datatype, phase, l_resolution=16, r_resolution=128, + split='train', data_len=-1, need_LR=False): + self.datatype = datatype + self.data_len = data_len + self.need_LR = need_LR + self.split = split + self.phase = phase + self.pre_data = PrepareTimeData(data_path=dataroot, phase=phase, base=l_resolution, size=r_resolution) + self.row_num = self.pre_data.get_row_num() + self.col_num = self.pre_data.get_col_num() + + if datatype == 'time': + self.hr_path, self.sr_path, self.labels, self.pre_labels = self.pre_data.get_sr_data() + self.dataset_len = len(self.sr_path) + if self.data_len <= 0: + self.data_len = self.dataset_len + else: + self.data_len = min(self.data_len, self.dataset_len) + else: + raise NotImplementedError( + 'data_type [{:s}] is not recognized.'.format(datatype)) + + def __len__(self): + return self.data_len + + def __getitem__(self, index): + + data_LR = None + data_ORI = self.hr_path[index] + data_HR = self.hr_path[index] + data_SR = self.sr_path[index] + data_label = self.labels[index] + + if self.phase == 'train': + return {'HR': data_HR, 'SR': data_SR, 'Index': index} + else: + return {'ORI': data_ORI, 'HR': data_HR, 'SR': data_SR, 'label': data_label, 'Index': index} diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__init__.py b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c915ff01b1a408c16e40fb1c5ce6eda3df0e9bc --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__init__.py @@ -0,0 +1,42 @@ +'''create dataset and dataloader''' +import logging + +import torch.utils.data + + +def create_dataloader(dataset, dataset_opt, phase): + '''create dataloader ''' + if phase == 'train': + return torch.utils.data.DataLoader( + dataset, + batch_size=dataset_opt['batch_size'], + shuffle=dataset_opt['use_shuffle'], + num_workers=dataset_opt['num_workers'], + pin_memory=True) + elif phase == 'val': + return torch.utils.data.DataLoader( + dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) + elif phase == 'test': + return torch.utils.data.DataLoader( + dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) + else: + raise NotImplementedError( + 'Dataloader [{:s}] is not found.'.format(phase)) + + +def create_dataset(dataset_opt, phase): + '''create dataset''' + mode = dataset_opt['mode'] + from data.LRHR_dataset import LRHRDataset as D + dataset = D(dataroot=dataset_opt['dataroot'], + datatype=dataset_opt['datatype'], + l_resolution=dataset_opt['l_resolution'], + r_resolution=dataset_opt['r_resolution'], + split=phase, + data_len=dataset_opt['data_len'], + need_LR=(mode == 'LR'), + phase=phase) + logger = logging.getLogger('base') + logger.info('Dataset [{:s} - {:s}] is created.'.format(dataset.__class__.__name__, + dataset_opt['name'])) + return dataset diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d6f440b01f784415869d311f231f82cc79786cb9 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cbe04557d59ecd665ab5936d0f456f5392e2bbb7 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e39c78c33598a1ab6c337e134f89dfec4d299be Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/LRHR_dataset.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8eb84ea015c91d03a6b9af293bb90b7ce97b8a33 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bd57e7845ba5e7c82b9578c15d89c58752c37872 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..290fea552665c003812613a8cd173fc4557cbf5e Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/__init__.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b140cae6c826e507ce230de76177672c9473ca8c Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8bb9ef515c71780a74d445b58d889b8fbb557792 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..512dafab3324b460ed69a511d6cb0b5e2e15e974 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/data/__pycache__/prepare_time_data.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/data/prepare_time_data.py b/subject1-4/dynamicSplit/02DiffAD-main_high/data/prepare_time_data.py new file mode 100644 index 0000000000000000000000000000000000000000..646f0c8ad6766994a3ba2174d5cd2eef85b9ddc4 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/data/prepare_time_data.py @@ -0,0 +1,355 @@ +import math +import warnings + +import numpy as np +import pandas as pd +import torch + +warnings.filterwarnings("ignore") + + +class PrepareTimeData: + def __init__(self, data_path, phase, base, size): + self.data_path = data_path + self.phase = phase + self.base = base + self.size = size + + self.data_name = self.data_path.split('/')[-1].split('_')[0] + self.read_dataset(self.data_path, self.data_name) + self.df = self.ori_df.copy() + self.row_num = self.ori_df.shape[0] + self.col_num = self.ori_df.shape[1] + self.mean = self.df.mean(axis=1) + + self.df = self.get_mean_df(self.df) + self.df = self.vertical_merge_df(self.df) + self.df = self.join_together_labels(self.df) + self.df = self.fill_data(self.df) + self.df = self.standardize_data(self.df) + + def get_hr_data(self): + df = self.df.copy() + ori_values, values, labels, pre_labels = self.get_data_by_interval(df) + + return ori_values, values, labels, pre_labels + + def get_sr_data(self): + df = self.df.copy() + ori_values, values, labels, pre_labels = self.get_data_by_insert_normal() + + return ori_values, values, labels, pre_labels + + def get_mean_df(self, df): + df = df.copy() + for col in df.columns: + df[col] = self.mean + return df + + def vertical_merge_df(self, df): + df = df.copy() + two_power = 2 + + if self.col_num < 16: + two_power = 16 + df_temp = pd.DataFrame() + col_count = 0 + for i in range(two_power - self.col_num): + if col_count >= self.col_num: + col_count = 0 + df_temp[i] = df.iloc[:, col_count] + col_count = col_count + 1 + else: + while self.col_num > two_power: + two_power = two_power * 2 + df_temp = df.iloc[:, 0:(two_power - self.col_num)] + + col_name = [] + for i in range(self.col_num): + col_name.append('value_' + str(i)) + + df.columns = col_name + col_name = [] + for i in range(self.col_num, two_power): + col_name.append('value_' + str(i)) + + df_temp.columns = col_name + df = pd.concat([df, df_temp], axis=1) + return df + + def join_together_labels(self, df): + df = df.copy() + + if self.phase == 'train': + df['label'] = 0 + else: + df['label'] = self.test_labels + return df + + def fill_data(self, df): + df = df.copy() + data_end = math.ceil(self.row_num / self.size) * self.size + + for i in range(self.row_num, data_end): + df = df._append(pd.Series(), ignore_index=True) + + df.fillna(0, inplace=True) + return df + + def read_dataset(self, data_path, data_name): + if data_name.upper().find('MSL') != -1: + cols = [-1] + self.get_dataset(data_path, cols) + elif data_name.upper().find('PSM') != -1: + if self.phase == 'train': + cols = [-1] + self.get_dataset(data_path, cols) + if self.ori_df.columns.__contains__('timestamp_(min)'): + self.ori_df.drop(columns=['timestamp_(min)'], inplace=True) + else: + cols = [-1] + self.get_dataset(data_path, cols) + if self.ori_df.columns.__contains__('timestamp_(min)'): + self.ori_df.drop(columns=['timestamp_(min)'], inplace=True) + self.test_labels.drop(columns=['timestamp_(min)'], inplace=True) + elif data_name.upper().find('SMAP') != -1: + cols = [0, 1, 2, 3, 4, 7, 8, 9, 10, 12, 13, 15, 16, 19, 20] + self.get_dataset(data_path, cols) + elif data_name.upper().find('SMD') != -1: + cols = [0, 1, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 28, 33, 35, 36, 37] + self.get_dataset(data_path, cols) + + def get_dataset(self, data_path, cols): + if self.phase == 'train': + if -1 in cols: + self.ori_df = pd.read_csv(data_path) + else: + self.ori_df = pd.read_csv(data_path, usecols=cols) + else: + if -1 in cols: + self.ori_df = pd.read_csv(data_path) + else: + self.ori_df = pd.read_csv(data_path, usecols=cols) + + test_label_path = self.data_path.replace('_test.csv', '_test_label.csv') + self.test_labels = pd.read_csv(test_label_path) + + def get_data_by_insert_normal(self): + df = pd.DataFrame(columns=['value', 'label']) + df['value'] = self.df['value_0'] + df['label'] = self.df['label'] + + df_pre_label = self.mutation_point(df) + insert_datas = self.insert_normal(df_pre_label) + + ori_values = [] + values = [] + labels = [] + pre_labels = [] + + start_index = 0 + end_index = self.size + + for col in self.df.columns: + if col == 'label': + continue + self.df[col] = insert_datas['value'] + self.df['pre_label'] = insert_datas['pre_label'] + + ori_df = self.vertical_merge_df(self.ori_df) + ori_df = self.fill_data(ori_df) + + for i in range(0, self.df.shape[0], self.size): + insert_data = pd.DataFrame() + ori_value = pd.DataFrame() + + insert_data = pd.concat([insert_data, self.df[start_index: end_index]]) + ori_value = pd.concat([ori_value, ori_df[start_index: end_index]]) + start_index += self.size + end_index += self.size + + value = insert_data.copy().drop(['label', 'pre_label'], axis=1) + label = insert_data['label'] + pre_label = insert_data['pre_label'] + + value = torch.tensor(np.array(value).astype(np.float32)) + label = torch.tensor(np.array(label).astype(np.int64)) + pre_label = torch.tensor(np.array(pre_label).astype(np.int64)) + ori_value = torch.tensor(np.array(ori_value).astype(np.float32)) + + values.append(value.unsqueeze(0)) + labels.append(label) + pre_labels.append(pre_label) + ori_values.append(ori_value.unsqueeze(0)) + + return ori_values, values, labels, pre_labels + + def standardize_data(self, df): + df = df.copy() + name = self.data_path.split('.csv')[0] + print(name, "Points: {}".format(self.row_num)) + df = self.complete_value(df) + + if self.phase != 'train': + anomaly_len = len(df[df['label'] == 1].index.tolist()) + print("Labeled anomalies: {}".format(anomaly_len)) + + return df + + def complete_value(self, df): + + df.fillna(0, inplace=True) + return df + + def get_mutation_point(self, df_pre_label, start_index, end_index, last_size_var): + size_var = df_pre_label['value'][start_index: end_index].var() + label_count = len(df_pre_label[start_index: end_index][df_pre_label['label'] == 1].index.tolist()) + + if last_size_var == 0: + times = 'Nan' + else: + times = size_var / last_size_var + if times < 1 and times != 0: + times = 1 / times + + if times != "Nan" and times >= 10: + df_pre_label['pre_label'][start_index: end_index] = 1 + else: + df_pre_label['pre_label'][start_index: end_index] = 0 + + return size_var + + def mutation_point(self, df): + df_pre_label = df.copy() + df_pre_label['pre_label'] = 0 + + size = 128 + start_index = 0 + end_index = size + all_var = df_pre_label['value'].var() + + last_size_var = 0 + for i in range(int(self.row_num / size)): + last_size_var = self.get_mutation_point(df_pre_label, start_index, end_index, last_size_var) + + start_index += size + end_index += size + + self.get_mutation_point(df_pre_label, start_index, self.row_num - 1, last_size_var) + return df_pre_label + + def get_index(self, indexes): + count = 0 + start_indexes = [] + end_indexes = [] + + if len(indexes) != 0: + count = count + 1 + start_indexes.append(indexes[0]) + + for i in range(1, len(indexes)): + if indexes[i - 1] + 1 != indexes[i]: + count = count + 1 + end_indexes.append(indexes[i - 1]) + start_indexes.append(indexes[i]) + + end_indexes.append(indexes[len(indexes) - 1]) + + return start_indexes, end_indexes, count + + def insert_normal(self, data): + pre_labels = 'pre_label' + + nor_indexes = data[0:self.row_num][data[pre_labels] == 0].index.tolist() + ano_indexes = data[0:self.row_num][data[pre_labels] == 1].index.tolist() + + nor_start_indexes, nor_end_indexes, nor_count = self.get_index(nor_indexes) + ano_start_indexes, ano_end_indexes, ano_count = self.get_index(ano_indexes) + + interval = int(self.size / self.base) + ano_len = 2 + + df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + for i in range(nor_count): + + if nor_end_indexes[i] - nor_start_indexes[i] + 1 < interval: + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(nor_start_indexes[i], nor_end_indexes[i] + 1) + xp = [nor_start_indexes[i], nor_end_indexes[i]] + fp = [data['value'][nor_start_indexes[i]], data['value'][nor_end_indexes[i]]] + z = np.interp(x, xp, fp) + + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + else: + last_start_x = -1 + start_xs = range(nor_start_indexes[i], nor_end_indexes[i] + 1, interval) + xp = [] + fp = [] + for start_x in start_xs: + if start_x + interval > nor_end_indexes[i]: + last_start_x = start_x + break + + xp.append(start_x) + xp.append(start_x + interval - 1) + + fp.append(data['value'][start_x]) + fp.append(data['value'][start_x + interval - 1]) + + x = range(nor_start_indexes[i], last_start_x) + z = np.interp(x, xp, fp) + + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + + if last_start_x != -1: + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(last_start_x, nor_end_indexes[i] + 1) + xp = [last_start_x, nor_end_indexes[i]] + fp = [data['value'][last_start_x], data['value'][nor_end_indexes[i]]] + z = np.interp(x, xp, fp) + + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + + for i in range(ano_count): + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(ano_start_indexes[i] - 1, ano_end_indexes[i] + 2) + xp = [ano_start_indexes[i] - 1, ano_end_indexes[i] + 1] + fp = [data['value'][ano_start_indexes[i] - 1], data['value'][ano_end_indexes[i] + 1]] + z = np.interp(x, xp, fp) + + for j in range(len(x)): + if j == 0 or j == len(x) - 1: + continue + + temp_df.loc[x[j], 'ind'] = x[j] + temp_df.loc[x[j], 'value'] = z[j] + temp_df.loc[x[j], 'pre_label'] = 1 + + df = pd.concat([df, temp_df]) + + df = df.set_index(['ind'], inplace=False).sort_index() + df['label'] = data['label'] + for i in range(self.row_num, data.shape[0]): + df = df._append(pd.Series(), ignore_index=True) + df.fillna(0, inplace=True) + return df + + def get_row_num(self): + return self.row_num + + def get_col_num(self): + return self.col_num diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__init__.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0eb77d1c825dcdeceddbc99eddf974a062750626 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__init__.py @@ -0,0 +1,10 @@ +import logging + +logger = logging.getLogger('base') + + +def create_model(opt): + from .model import DDPM as M + m = M(opt) + logger.info('Model [{:s}] is created.'.format(m.__class__.__name__)) + return m diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..082de7cfb60ff5dd414beac17a353dfd923d5957 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..78d1ba7353e61cd665d6a5c549a9e690b94854ec Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab45257dcdcb10a5de99202a7f9699e0e3952e25 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/__init__.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a170668104bed75c2af54769631fedc794c476b6 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c487ba492eb115fb44ec4ebff51ce73c3eaa16a Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3da8f605c28b8459254af43ffe831b1b9783811d Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/base_model.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..eaaf2026989c9d60984dd033f4de1f75f61500ac Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a6d3feb52b05a954211ccca611f7a60e346cf07d Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5b417c9cb376fba19d0425af84e967504b2847c Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/model.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c8a0e3a599f235d2d50149e7cb4433ff164390c2 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f54583dcd8ca45edf1ccee30b849a6bd8b1630a6 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5b7f298038b9d6c90d587852ba00957106b20b2 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/__pycache__/networks.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/base_model.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..de1022eb58ad1e4b8f79553282403e843aa04609 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/base_model.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + + +class BaseModel(): + def __init__(self, opt): + self.opt = opt + self.device = torch.device( + 'cuda' if opt['gpu_ids'] is not None else 'cpu') + self.begin_step = 0 + self.begin_epoch = 0 + + def feed_data(self, data): + pass + + def optimize_parameters(self): + pass + + def get_current_visuals(self): + pass + + def get_current_losses(self): + pass + + def print_network(self): + pass + + def set_device(self, x): + if isinstance(x, dict): + for key, item in x.items(): + if item is not None: + x[key] = item.to(self.device) + elif isinstance(x, list): + for item in x: + if item is not None: + item = item.to(self.device) + else: + x = x.to(self.device) + return x + + def get_network_description(self, network): + '''Get the string and total parameters of the network''' + if isinstance(network, nn.DataParallel): + network = network.module + s = str(network) + n = sum(map(lambda x: x.numel(), network.parameters())) + return s, n diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/model.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..d0a61048b7f681b5f40adf9663ac6da256febe1f --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/model.py @@ -0,0 +1,175 @@ +import logging +import os +from collections import OrderedDict + +import torch +import torch.nn as nn + +import model.networks as networks +from .base_model import BaseModel + +logger = logging.getLogger('base') + + +class DDPM(BaseModel): + def __init__(self, opt): + super(DDPM, self).__init__(opt) + netG = networks.define_G(opt) + + self.netG = self.set_device(netG) + self.schedule_phase = None + + # set loss and load resume state + self.set_loss() + self.set_new_noise_schedule( + opt['model']['beta_schedule']['train'], schedule_phase='train') + + if self.opt['phase'] == 'train': + self.netG.train() + if opt['model']['finetune_norm']: + optim_params = [] + for k, v in self.netG.named_parameters(): + v.requires_grad = False + if k.find('transformer') >= 0: + v.requires_grad = True + v.data.zero_() + optim_params.append(v) + logger.info( + 'Params [{:s}] initialized to 0 and will optimize.'.format(k)) + else: + optim_params = list(self.netG.parameters()) + + self.optG = torch.optim.Adam( + optim_params, lr=opt['train']["optimizer"]["lr"]) + self.log_dict = OrderedDict() + self.load_network() + self.print_network() + + def feed_data(self, data): + self.data = self.set_device(data) + + def optimize_parameters(self): + self.optG.zero_grad() + l_pix = self.netG(self.data) + # need to average in multi-gpu + b, c, h, w = self.data['HR'].shape + l_pix = l_pix.sum() / int(b * c * h * w) + + l_pix.backward() + self.optG.step() + + # set log + self.log_dict['l_pix'] = l_pix.item() + + def test(self,connection,continous=False): + self.netG.eval() + + with torch.no_grad(): + ori = self.data['ORI'].squeeze() + min_num = ori.min().item() + max_num = ori.max().item() + if isinstance(self.netG, nn.DataParallel): + self.SR = self.netG.module.super_resolution( + self.data['SR'],connection, continous=continous, min_num=min_num, max_num=max_num) + else: + self.SR = self.netG.super_resolution( + self.data['SR'],connection, continous=continous, min_num=min_num, max_num=max_num) + self.netG.train() + + def sample(self, batch_size=1, continous=False): + self.netG.eval() + with torch.no_grad(): + if isinstance(self.netG, nn.DataParallel): + self.SR = self.netG.module.sample(batch_size, continous) + else: + self.SR = self.netG.sample(batch_size, continous) + self.netG.train() + + def set_loss(self): + if isinstance(self.netG, nn.DataParallel): + self.netG.module.set_loss(self.device) + else: + self.netG.set_loss(self.device) + + def set_new_noise_schedule(self, schedule_opt, schedule_phase='train'): + if self.schedule_phase is None or self.schedule_phase != schedule_phase: + self.schedule_phase = schedule_phase + if isinstance(self.netG, nn.DataParallel): + self.netG.module.set_new_noise_schedule( + schedule_opt, self.device) + else: + self.netG.set_new_noise_schedule(schedule_opt, self.device) + + def get_current_log(self): + return self.log_dict + + def get_current_visuals(self, need_LR=True, sample=False): + out_dict = OrderedDict() + if sample: + out_dict['SAM'] = self.SR.detach().float().cpu() + else: + out_dict['SR'] = self.SR.detach().float().cpu() + out_dict['INF'] = self.data['SR'].detach().float().cpu() + out_dict['ORI'] = self.data['ORI'].detach().float().cpu() + out_dict['HR'] = self.data['HR'].detach().float().cpu() + out_dict['label'] = self.data['label'].detach().cpu() + if need_LR and 'LR' in self.data: + out_dict['LR'] = self.data['LR'].detach().float().cpu() + else: + out_dict['LR'] = out_dict['INF'] + return out_dict + + def print_network(self): + s, n = self.get_network_description(self.netG) + if isinstance(self.netG, nn.DataParallel): + net_struc_str = '{} - {}'.format(self.netG.__class__.__name__, + self.netG.module.__class__.__name__) + else: + net_struc_str = '{}'.format(self.netG.__class__.__name__) + + logger.info( + 'Network G structure: {}, with parameters: {:,d}'.format(net_struc_str, n)) + logger.info(s) + + def save_network(self, epoch, iter_step): + gen_path = os.path.join( + self.opt['path']['checkpoint'], 'E{}_gen.pth'.format(epoch)) + opt_path = os.path.join( + self.opt['path']['checkpoint'], 'E{}_opt.pth'.format(epoch)) + # gen + network = self.netG + if isinstance(self.netG, nn.DataParallel): + network = network.module + state_dict = network.state_dict() + for key, param in state_dict.items(): + state_dict[key] = param.cpu() + torch.save(state_dict, gen_path) + # opt + opt_state = {'epoch': epoch, 'iter': iter_step, + 'scheduler': None, 'optimizer': None} + opt_state['optimizer'] = self.optG.state_dict() + torch.save(opt_state, opt_path) + + logger.info( + 'Saved model in [{:s}] ...'.format(gen_path)) + + def load_network(self): + load_path = self.opt['path']['resume_state'] + if load_path is not None: + logger.info( + 'Loading pretrained model for G [{:s}] ...'.format(load_path)) + gen_path = '{}_gen.pth'.format(load_path) + opt_path = '{}_opt.pth'.format(load_path) + # gen + network = self.netG + if isinstance(self.netG, nn.DataParallel): + network = network.module + network.load_state_dict(torch.load( + gen_path), strict=(not self.opt['model']['finetune_norm'])) + + if self.opt['phase'] == 'train': + # optimizer + opt = torch.load(opt_path) + self.optG.load_state_dict(opt['optimizer']) + self.begin_step = opt['iter'] + self.begin_epoch = opt['epoch'] diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/networks.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..d2769b5477610f43aca87cf485e59c9c1c5e5313 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/networks.py @@ -0,0 +1,112 @@ +import functools +import logging + +import torch +import torch.nn as nn +from torch.nn import init + +logger = logging.getLogger('base') + + +# initialize +def weights_init_normal(m, std=0.02): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + init.normal_(m.weight.data, 0.0, std) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.normal_(m.weight.data, 0.0, std) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, std) # BN also uses norm + init.constant_(m.bias.data, 0.0) + + +def weights_init_kaiming(m, scale=1): + classname = m.__class__.__name__ + if classname.find('Conv2d') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.constant_(m.weight.data, 1.0) + init.constant_(m.bias.data, 0.0) + + +def weights_init_orthogonal(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + init.orthogonal_(m.weight.data, gain=1) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.orthogonal_(m.weight.data, gain=1) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.constant_(m.weight.data, 1.0) + init.constant_(m.bias.data, 0.0) + + +def init_weights(net, init_type='kaiming', scale=1, std=0.02): + # scale for 'kaiming', std for 'normal'. + logger.info('Initialization method [{:s}]'.format(init_type)) + if init_type == 'normal': + weights_init_normal_ = functools.partial(weights_init_normal, std=std) + net.apply(weights_init_normal_) + elif init_type == 'kaiming': + weights_init_kaiming_ = functools.partial( + weights_init_kaiming, scale=scale) + net.apply(weights_init_kaiming_) + elif init_type == 'orthogonal': + net.apply(weights_init_orthogonal) + else: + raise NotImplementedError( + 'initialization method [{:s}] not implemented'.format(init_type)) + + +# define network +def define_G(opt): + model_opt = opt['model'] + if model_opt['which_model_G'] == 'sr3': + from .sr3_modules import diffusion, unet + + if ('norm_groups' not in model_opt['unet']) or model_opt['unet']['norm_groups'] is None: + model_opt['unet']['norm_groups'] = 32 + + model = unet.UNet( + in_channel=model_opt['unet']['in_channel'], + out_channel=model_opt['unet']['out_channel'], + norm_groups=model_opt['unet']['norm_groups'], + inner_channel=model_opt['unet']['inner_channel'], + channel_mults=model_opt['unet']['channel_multiplier'], + attn_res=model_opt['unet']['attn_res'], + res_blocks=model_opt['unet']['res_blocks'], + dropout=model_opt['unet']['dropout'], + time_size=model_opt['diffusion']['time_size'] + ) + + netG = diffusion.GaussianDiffusion( + model, + time_size=model_opt['diffusion']['time_size'], + channels=model_opt['diffusion']['channels'], + loss_type='l1', # L1 or L2 + conditional=model_opt['diffusion']['conditional'], + schedule_opt=model_opt['beta_schedule']['train'] + ) + if opt['phase'] == 'train': + init_weights(netG, init_type='orthogonal') + + if opt['gpu_ids'] and opt['distributed']: + assert torch.cuda.is_available() + netG = nn.DataParallel(netG) + + return netG diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..02efccf97b419249399eb1051362db2cdf2e5ab4 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c07ac5f1fb3a4272820d36f3ec06b99f65f03bbf Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be4f35138c6051016821f8bea0216749cb0393f2 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-311.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..238e09c47c02c50a1d25ac838b07d2e8110ed8ae Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-311.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69c86c07ff82dfdf6974e215ce6058f76e46913d Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..914d38468f8770184e50ff0233c2c5f5851c6297 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/__pycache__/unet.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/diffusion.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..0e0ec32e9e8098209b2e7017e548540644ce385f --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/diffusion.py @@ -0,0 +1,284 @@ +import math +from functools import partial +from inspect import isfunction + +import numpy as np +import torch +from torch import nn +from tqdm import tqdm +import socket +import time +import io + +def _warmup_beta(linear_start, linear_end, n_timestep, warmup_frac): + betas = linear_end * np.ones(n_timestep, dtype=np.float64) + warmup_time = int(n_timestep * warmup_frac) + betas[:warmup_time] = np.linspace( + linear_start, linear_end, warmup_time, dtype=np.float64) + return betas + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == 'quad': + betas = np.linspace(linear_start ** 0.5, linear_end ** 0.5, + n_timestep, dtype=np.float64) ** 2 + elif schedule == 'linear': + betas = np.linspace(linear_start, linear_end, + n_timestep, dtype=np.float64) + elif schedule == 'warmup10': + betas = _warmup_beta(linear_start, linear_end, + n_timestep, 0.1) + elif schedule == 'warmup50': + betas = _warmup_beta(linear_start, linear_end, + n_timestep, 0.5) + elif schedule == 'const': + betas = linear_end * np.ones(n_timestep, dtype=np.float64) + elif schedule == 'jsd': # 1/T, 1/(T-1), 1/(T-2), ..., 1 + betas = 1. / np.linspace(n_timestep, + 1, n_timestep, dtype=np.float64) + elif schedule == "cosine": + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / + n_timestep + cosine_s + ) + alphas = timesteps / (1 + cosine_s) * math.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = betas.clamp(max=0.999) + else: + raise NotImplementedError(schedule) + return betas + + +# gaussian diffusion trainer class +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +class GaussianDiffusion(nn.Module): + def __init__(self, denoise_fn, time_size, channels=3, loss_type='l1', + conditional=True, schedule_opt=None): + + super().__init__() + self.channels = channels + self.time_size = time_size + self.denoise_fn = denoise_fn + self.loss_type = loss_type + self.conditional = conditional + if schedule_opt is not None: + pass + + def set_loss(self, device): + if self.loss_type == 'l1': + self.loss_func = nn.L1Loss(reduction='sum').to(device) + elif self.loss_type == 'l2': + self.loss_func = nn.MSELoss(reduction='sum').to(device) + else: + raise NotImplementedError() + + def set_new_noise_schedule(self, schedule_opt, device): + to_torch = partial(torch.tensor, dtype=torch.float32, device=device) + + betas = make_beta_schedule( + schedule=schedule_opt['schedule'], + n_timestep=schedule_opt['n_timestep'], + linear_start=schedule_opt['linear_start'], + linear_end=schedule_opt['linear_end']) + + betas = betas.detach().cpu().numpy() if isinstance( + betas, torch.Tensor) else betas + + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + self.sqrt_alphas_cumprod_prev = np.sqrt( + np.append(1., alphas_cumprod)) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', + to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', + to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', + to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', + to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', + to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', + to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = betas * \ + (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', + to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch( + np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + def predict_start_from_noise(self, x_t, t, noise): + return self.sqrt_recip_alphas_cumprod[t] * x_t - \ + self.sqrt_recipm1_alphas_cumprod[t] * noise + + def q_posterior(self, x_start, x_t, t): + posterior_mean = self.posterior_mean_coef1[t] * \ + x_start + self.posterior_mean_coef2[t] * x_t + posterior_log_variance_clipped = self.posterior_log_variance_clipped[t] + return posterior_mean, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool, condition_x=None): + batch_size = x.shape[0] + noise_level = torch.FloatTensor( + [self.sqrt_alphas_cumprod_prev[t + 1]]).repeat(batch_size, 1).to(x.device) + if condition_x is not None: + x_temp = torch.cat([condition_x, x], dim=1) + noise = self.denoise_fn(x_temp, noise_level) + x_recon = self.predict_start_from_noise(x, t=t, noise=noise) + else: + x_recon = self.predict_start_from_noise( + x, t=t, noise=self.denoise_fn(x, noise_level)) + + if clip_denoised: + x_recon.clamp_(self.min_num, self.max_num) + + model_mean, posterior_log_variance = self.q_posterior( + x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, condition_x=None): + model_mean, model_log_variance = self.p_mean_variance( + x=x, t=t, clip_denoised=clip_denoised, condition_x=condition_x) + + noise = torch.randn_like(x) if t > 0 else torch.zeros_like(x) + return model_mean + noise * (0.5 * model_log_variance).exp() + + @torch.no_grad() + def p_sample_loop(self, x_in,connection, continous=False): + device = self.betas.device + q = 0 + + sample_inter = (1 | (self.num_timesteps // 10)) + + img = self.receive_tensor(connection) + x = self.receive_tensor(connection) + print("img.shape,x.shape---",img.shape, x.shape) + + if not self.conditional: + # shape = x_in + shape =x + print(shape)#kk + # img = torch.randn(shape, device=device) + ret_img = img + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='sampling loop time step', + total=self.num_timesteps): + img = self.p_sample(img, i, clip_denoised=True) + if i % sample_inter == 0: + ret_img = torch.cat([ret_img, img], dim=0) + else: + # x = x_in + shape = x.shape + print(shape)#kk + # img = torch.randn(shape, device=device) + ret_img = x + + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='sampling loop time step', + total=self.num_timesteps): + img = self.p_sample(img, i, condition_x=x, clip_denoised=True) + if i % sample_inter == 0: + ret_img = torch.cat([ret_img, img], dim=0) + + # print(ret_img.shape)#kk + # print(ret_img[-1].shape)#kk + + if continous: + return ret_img + else: + return ret_img[-1] + + @torch.no_grad() + def receive_tensor(self,socket): + # 首先读取数据长度 + data_length = int.from_bytes(socket.recv(4), byteorder='big') + # 然后根据数据长度读取数据 + data = b'' + while len(data) < data_length: + packet = socket.recv(data_length - len(data)) + if not packet: + return None + data += packet + buffer = io.BytesIO(data) + tensor = torch.load(buffer) + return tensor + + @torch.no_grad() + def sample(self, batch_size=1, continous=False): + time_size = self.time_size + channels = self.channels + return self.p_sample_loop((batch_size, channels, time_size, time_size), continous) + + @torch.no_grad() + def super_resolution(self, x_in,connection, min_num, max_num, continous=False): + self.min_num = min_num + self.max_num = max_num + return self.p_sample_loop(x_in,connection, continous) + + def q_sample(self, x_start, continuous_sqrt_alpha_cumprod, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + + # random gama + return ( + continuous_sqrt_alpha_cumprod * x_start + + (1 - continuous_sqrt_alpha_cumprod ** 2).sqrt() * noise + ) + + def p_losses(self, x_in, noise=None): + x_start = x_in['HR'] + [b, c, h, w] = x_start.shape + t = np.random.randint(1, self.num_timesteps + 1) + + continuous_sqrt_alpha_cumprod = torch.FloatTensor( + np.random.uniform( + self.sqrt_alphas_cumprod_prev[t - 1], + self.sqrt_alphas_cumprod_prev[t], + size=b + ) + ).to(x_start.device) + + continuous_sqrt_alpha_cumprod = continuous_sqrt_alpha_cumprod.view(b, -1) + + noise = default(noise, lambda: torch.randn_like(x_start)) + + x_noisy = self.q_sample( + x_start=x_start, continuous_sqrt_alpha_cumprod=continuous_sqrt_alpha_cumprod.view(-1, 1, 1, 1), noise=noise) + + if not self.conditional: + x_recon = self.denoise_fn(x_noisy, continuous_sqrt_alpha_cumprod) + else: + x_cat = torch.cat([x_in['SR'], x_noisy], dim=1) + x_recon = self.denoise_fn(x_cat, continuous_sqrt_alpha_cumprod) + + loss = self.loss_func(noise, x_recon) + return loss + + def forward(self, x, *args, **kwargs): + return self.p_losses(x, *args, **kwargs) diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/unet.py b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..77159a7aa693782fe91e10ce6d3083fbc5947bf0 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/model/sr3_modules/unet.py @@ -0,0 +1,274 @@ +import math +from inspect import isfunction + +import torch +from torch import nn + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +class PositionalEncoding(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, noise_level): + count = self.dim // 2 + step = torch.arange(count, dtype=noise_level.dtype, + device=noise_level.device) / count + encoding = noise_level.unsqueeze( + 1) * torch.exp(-math.log(1e4) * step.unsqueeze(0)) + encoding = torch.cat( + [torch.sin(encoding), torch.cos(encoding)], dim=-1) + return encoding + + +class FeatureWiseAffine(nn.Module): + def __init__(self, in_channels, out_channels, use_affine_level=False): + super(FeatureWiseAffine, self).__init__() + self.use_affine_level = use_affine_level + self.noise_func = nn.Sequential( + nn.Linear(in_channels, out_channels * (1 + self.use_affine_level)) + ) + + def forward(self, x, noise_embed): + batch = x.shape[0] + if self.use_affine_level: + gamma, beta = self.noise_func(noise_embed).view( + batch, -1, 1, 1).chunk(3, dim=1) + x = (1 + gamma) * x + beta + else: + x = x + self.noise_func(noise_embed).view(batch, -1, 1, 1) + return x + + +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class Upsample(nn.Module): + def __init__(self, dim): + super().__init__() + self.up = nn.Upsample(scale_factor=2, mode="nearest") + self.conv = nn.Conv2d(dim, dim, 3, padding=1) + + def forward(self, x): + return self.conv(self.up(x)) + + +class Downsample(nn.Module): + def __init__(self, dim): + super().__init__() + self.conv = nn.Conv2d(dim, dim, 3, 2, 1) + + def forward(self, x): + return self.conv(x) + + +# building block modules +class Block(nn.Module): + def __init__(self, dim, dim_out, groups=32, dropout=0): + super().__init__() + self.block = nn.Sequential( + nn.GroupNorm(groups, dim), + Swish(), + nn.Dropout(dropout) if dropout != 0 else nn.Identity(), + nn.Conv2d(dim, dim_out, 3, padding=1) + ) + + def forward(self, x): + return self.block(x) + + +class ResnetBlock(nn.Module): + def __init__(self, dim, dim_out, noise_level_emb_dim=None, dropout=0, use_affine_level=False, norm_groups=32): + super().__init__() + self.noise_func = FeatureWiseAffine( + noise_level_emb_dim, dim_out, use_affine_level) + + self.block1 = Block(dim, dim_out, groups=norm_groups) + self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout) + self.res_conv = nn.Conv2d( + dim, dim_out, 1) if dim != dim_out else nn.Identity() + + def forward(self, x, time_emb): + b, c, h, w = x.shape + h = self.block1(x) + h = self.noise_func(h, time_emb) + h = self.block2(h) + return h + self.res_conv(x) + + +class SelfAttention(nn.Module): + def __init__(self, in_channel, n_head=1, norm_groups=32): + super().__init__() + + self.n_head = n_head + + self.norm = nn.GroupNorm(norm_groups, in_channel) + self.qkv = nn.Conv2d(in_channel, in_channel * 3, 1, bias=False) + self.out = nn.Conv2d(in_channel, in_channel, 1) + + def forward(self, input): + batch, channel, height, width = input.shape + n_head = self.n_head + head_dim = channel // n_head + + norm = self.norm(input) + qkv = self.qkv(norm).view(batch, n_head, head_dim * 3, height, width) + query, key, value = qkv.chunk(3, dim=2) # bhdyx + + attn = torch.einsum( + "bnchw, bncyx -> bnhwyx", query, key + ).contiguous() / math.sqrt(channel) + attn = attn.view(batch, n_head, height, width, -1) + attn = torch.softmax(attn, -1) + attn = attn.view(batch, n_head, height, width, height, width) + + out = torch.einsum("bnhwyx, bncyx -> bnchw", attn, value).contiguous() + out = self.out(out.view(batch, channel, height, width)) + + return out + input + + +class ResnetBlocWithAttn(nn.Module): + def __init__(self, dim, dim_out, *, noise_level_emb_dim=None, norm_groups=32, dropout=0, with_attn=False): + super().__init__() + self.with_attn = with_attn + self.res_block = ResnetBlock( + dim, dim_out, noise_level_emb_dim, norm_groups=norm_groups, dropout=dropout) + if with_attn: + self.attn = SelfAttention(dim_out, norm_groups=norm_groups) + + def forward(self, x, time_emb): + x = self.res_block(x, time_emb) + if (self.with_attn): + x = self.attn(x) + return x + + +class UNet(nn.Module): + def __init__( + self, + in_channel=6, + out_channel=3, + inner_channel=32, + norm_groups=32, + channel_mults=(1, 2, 4, 8, 8), + attn_res=(8), + res_blocks=3, + dropout=0, + with_noise_level_emb=True, + time_size=128 + ): + super().__init__() + + if with_noise_level_emb: + noise_level_channel = inner_channel + self.noise_level_mlp = nn.Sequential( + PositionalEncoding(inner_channel), + nn.Linear(inner_channel, inner_channel * 4), + Swish(), + nn.Linear(inner_channel * 4, inner_channel) + ) + else: + noise_level_channel = None + self.noise_level_mlp = None + + num_mults = len(channel_mults) + pre_channel = inner_channel + feat_channels = [pre_channel] + now_res = time_size + downs = [nn.Conv2d(in_channel, inner_channel, + kernel_size=3, padding=1)] + + for ind in range(num_mults): + is_last = (ind == num_mults - 1) + use_attn = (now_res in attn_res) + channel_mult = inner_channel * channel_mults[ind] + + for _ in range(0, res_blocks): + downs.append(ResnetBlocWithAttn( + pre_channel, channel_mult, noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups, + dropout=dropout, with_attn=use_attn)) + feat_channels.append(channel_mult) + pre_channel = channel_mult + if not is_last: + downs.append(Downsample(pre_channel)) + feat_channels.append(pre_channel) + now_res = now_res // 2 + self.downs = nn.ModuleList(downs) + + self.mid = nn.ModuleList([ + ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=True), + ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=False) + ]) + + ups = [] + for ind in reversed(range(num_mults)): + is_last = (ind < 1) + use_attn = (now_res in attn_res) + channel_mult = inner_channel * channel_mults[ind] + + for _ in range(0, res_blocks + 1): + ups.append(ResnetBlocWithAttn( + pre_channel + feat_channels.pop(), channel_mult, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=use_attn)) + pre_channel = channel_mult + if not is_last: + ups.append(Upsample(pre_channel)) + now_res = now_res * 2 + + self.ups = nn.ModuleList(ups) + self.final_conv = Block(pre_channel, default(out_channel, in_channel), groups=norm_groups) + + def forward(self, x, time): + t = self.noise_level_mlp(time) if exists( + self.noise_level_mlp) else None + + feats = [] + for layer in self.downs: + if isinstance(layer, ResnetBlocWithAttn): + x = layer(x, t) + else: + x = layer(x) + feats.append(x) + + for layer in self.mid: + if isinstance(layer, ResnetBlocWithAttn): + x = layer(x, t) + else: + x = layer(x) + + for layer in self.ups: + if isinstance(layer, ResnetBlocWithAttn): + pop_temp = feats.pop() + x_temp = torch.cat((x, pop_temp), dim=1) + x = layer(x_temp, t) + else: + x = layer(x) + + return self.final_conv(x) +#模型组件 +#PositionalEncoding: 位置编码器,用于编码噪声级别。 +#FeatureWiseAffine: 用于特征级别的仿射变换。 +#Swish: Swish 激活函数。 +#Upsample: 上采样模块。 +#Downsample: 下采样模块。 +#Block: 基本的卷积块。 +#ResnetBlock: ResNet 风格的残差块。 +#SelfAttention: 自注意力机制模块,用于提取图像特征中的重要信息。 \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/requirements.txt b/subject1-4/dynamicSplit/02DiffAD-main_high/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..918c9c0a31edb7a74cdce8cced4814813dc67ff5 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/requirements.txt @@ -0,0 +1,7 @@ +torch>=1.12 +torchvision +numpy~=1.23.2 +pandas~=1.5.1 +scikit-learn~=1.1.2 +tqdm~=4.64.1 +tensorboardx~=2.5.1 \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/server2.py b/subject1-4/dynamicSplit/02DiffAD-main_high/server2.py new file mode 100644 index 0000000000000000000000000000000000000000..bc031d5a165f94e7edbb9c3f967e8e980966ca1e --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/server2.py @@ -0,0 +1,44 @@ +import socket +import time +import io +import torch +def receive_tensor(socket): + # 首先读取数据长度 + data_length = int.from_bytes(socket.recv(4), byteorder='big') + # 然后根据数据长度读取数据 + data = b'' + while len(data) < data_length: + packet = socket.recv(data_length - len(data)) + if not packet: + return None + data += packet + buffer = io.BytesIO(data) + tensor = torch.load(buffer) + return tensor + +def main(): + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_address = ('', 9999) # 空字符串表示监听所有可用的接口 + server_socket.bind(server_address) + server_socket.listen(1) + + print("等待客户端连接...") + connection, client_address = server_socket.accept() + print(f"客户端 {client_address} 已连接") + + while True: + # data = client_connection.recv(4) + # if len(data) == 4: + # received_int = int.from_bytes(data, byteorder='big') + # print("---------",received_int) + # else: + # print("Received incomplete data.") + img = receive_tensor(connection) + x = receive_tensor(connection) + print("img.shape,x.shape---",img.shape, x.shape) + + connection.close() + server_socket.close() + +if __name__ == "__main__": + main() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/test_split.py b/subject1-4/dynamicSplit/02DiffAD-main_high/test_split.py new file mode 100644 index 0000000000000000000000000000000000000000..bf821c36365c39191464c3c98d58034a031d6794 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/test_split.py @@ -0,0 +1,163 @@ +import argparse +import logging +import os +import time + +import pandas as pd +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import core.metrics as Metrics +import data as Data +import model as Model +from decimal import Decimal +import sys + +import socket +import time +import io + +def time_test(params, strategy_params, temp_list): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + opt = params['opt'] + logger = params['logger'] + logger_test = params['logger_test'] + model_epoch = params['model_epoch'] + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + logger.info('Begin Model Evaluation.') + idx = 0 + + all_datas = pd.DataFrame() + sr_datas = pd.DataFrame() + differ_datas = pd.DataFrame() + + result_path = '{}'.format(opt['path']['results']) + os.makedirs(result_path, exist_ok=True) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_address = ('', 9999) # 空字符串表示监听所有可用的接口 + server_socket.bind(server_address) + server_socket.listen(1) + + print("等待客户端连接...") + connection, client_address = server_socket.accept() + print(f"客户端 {client_address} 已连接") + + for _, test_data in enumerate(test_loader): + # print(test_data['ORI'].shape) + # print(test_data['HR'].shape) + # print(test_data['SR'].shape) + # print(test_data['label'].shape) + # sys.exit (0) + idx += 1 + diffusion.feed_data(test_data) + diffusion.test(connection,continous=False) + visuals = diffusion.get_current_visuals() + + all_data, sr_df, differ_df = Metrics.tensor2allcsv(visuals, params['col_num']) + all_datas = Metrics.merge_all_csv(all_datas, all_data) + sr_datas = Metrics.merge_all_csv(sr_datas, sr_df) + differ_datas = Metrics.merge_all_csv(differ_datas, differ_df) + + # print(idx) + # sys.exit (0) + connection.close() + server_socket.close() + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + all_datas = all_datas.reset_index(drop=True) + sr_datas = sr_datas.reset_index(drop=True) + differ_datas = differ_datas.reset_index(drop=True) + + for i in range(params['row_num'], all_datas.shape[0]): + all_datas.drop(index=[i], inplace=True) + sr_datas.drop(index=[i], inplace=True) + differ_datas.drop(index=[i], inplace=True) + + f1,accuracy,precision,recall = Metrics.relabeling_strategy(all_datas, strategy_params) + + temp_f1 = Decimal(f1).quantize(Decimal("0.0000")) + temp_acc = Decimal(accuracy).quantize(Decimal("0.0000")) + temp_prec = Decimal(precision).quantize(Decimal("0.0000")) + temp_rec = Decimal(recall).quantize(Decimal("0.0000")) + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + print('F1-score: ', float(temp_f1)) + print('precision: ', float(temp_prec)) + print('accuracy: ', float(temp_acc)) + print('recall: ', float(temp_rec)) + + +# evaluate model performance +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='//02DiffAD-main/config/smd_time_test.json', + help='JSON file for configuration') + parser.add_argument('-p', '--phase', type=str, choices=['train ', 'val', 'test'], + help='Run either train(training) or val(generation)', default='test') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + temp_list = [] + model_epoch = 100 + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args, model_epoch) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + logger_name = 'test' + str(model_epoch) + # logging + Logger.setup_logger(logger_name, opt['path']['log'], 'test', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + #开始测试 + test_set = Data.create_dataset(opt['datasets']['test'], 'test') + + test_loader = Data.create_dataloader(test_set, opt['datasets']['test'], 'test') + logger.info('Initial Dataset Finished') + logger_test = logging.getLogger(logger_name) # test logger + + start_label = opt['model']['beta_schedule']['test']['start_label'] + end_label = opt['model']['beta_schedule']['test']['end_label'] + step_label = opt['model']['beta_schedule']['test']['step_label'] + step_t = opt['model']['beta_schedule']['test']['step_t'] + strategy_params = { + 'start_label': start_label, + 'end_label': end_label, + 'step_label': step_label, + 'step_t': step_t + } + + params = { + 'opt': opt, + 'logger': logger, + 'logger_test': logger_test, + 'model_epoch': model_epoch, + 'row_num': test_set.row_num, + 'col_num': test_set.col_num + } + + time_test(params, strategy_params, temp_list) + logging.shutdown() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/time_test.py b/subject1-4/dynamicSplit/02DiffAD-main_high/time_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f90530e886721500c7f9e4e19184adee962a3b --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/time_test.py @@ -0,0 +1,133 @@ +import argparse +import logging +import os +import time + +import pandas as pd +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import core.metrics as Metrics +import data as Data +import model as Model +from decimal import Decimal + + +def time_test(params, strategy_params, temp_list): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + opt = params['opt'] + logger = params['logger'] + logger_test = params['logger_test'] + model_epoch = params['model_epoch'] + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + logger.info('Begin Model Evaluation.') + idx = 0 + + all_datas = pd.DataFrame() + sr_datas = pd.DataFrame() + differ_datas = pd.DataFrame() + + result_path = '{}'.format(opt['path']['results']) + os.makedirs(result_path, exist_ok=True) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + for _, test_data in enumerate(test_loader): + idx += 1 + diffusion.feed_data(test_data) + diffusion.test(continous=False) + visuals = diffusion.get_current_visuals() + + all_data, sr_df, differ_df = Metrics.tensor2allcsv(visuals, params['col_num']) + all_datas = Metrics.merge_all_csv(all_datas, all_data) + sr_datas = Metrics.merge_all_csv(sr_datas, sr_df) + differ_datas = Metrics.merge_all_csv(differ_datas, differ_df) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + all_datas = all_datas.reset_index(drop=True) + sr_datas = sr_datas.reset_index(drop=True) + differ_datas = differ_datas.reset_index(drop=True) + + for i in range(params['row_num'], all_datas.shape[0]): + all_datas.drop(index=[i], inplace=True) + sr_datas.drop(index=[i], inplace=True) + differ_datas.drop(index=[i], inplace=True) + + f1 = Metrics.relabeling_strategy(all_datas, strategy_params) + temp_f1 = Decimal(f1) + # temp_f1 = Decimal(f1).quantize(Decimal("0.0000")) + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + print('F1-score: ', float(temp_f1)) + + +# evaluate model performance +if __name__ == '__main__': + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='//02DiffAD-main/config/msl_time_test.json', + help='JSON file for configuration') + parser.add_argument('-p', '--phase', type=str, choices=['train ', 'val', 'test'], + help='Run either train(training) or val(generation)', default='test') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + temp_list = [] + model_epoch = 100 + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args, model_epoch) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + logger_name = 'test' + str(model_epoch) + # logging + Logger.setup_logger(logger_name, opt['path']['log'], 'test', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + test_set = Data.create_dataset(opt['datasets']['test'], 'test') + + test_loader = Data.create_dataloader(test_set, opt['datasets']['test'], 'test') + logger.info('Initial Dataset Finished') + logger_test = logging.getLogger(logger_name) # test logger + + start_label = opt['model']['beta_schedule']['test']['start_label'] + end_label = opt['model']['beta_schedule']['test']['end_label'] + step_label = opt['model']['beta_schedule']['test']['step_label'] + step_t = opt['model']['beta_schedule']['test']['step_t'] + strategy_params = { + 'start_label': start_label, + 'end_label': end_label, + 'step_label': step_label, + 'step_t': step_t + } + + params = { + 'opt': opt, + 'logger': logger, + 'logger_test': logger_test, + 'model_epoch': model_epoch, + 'row_num': test_set.row_num, + 'col_num': test_set.col_num + } + + time_test(params, strategy_params, temp_list) + logging.shutdown() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_high/time_train.py b/subject1-4/dynamicSplit/02DiffAD-main_high/time_train.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f87e3eb4862cbc7f7a5fea1744280a00aa1d69 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_high/time_train.py @@ -0,0 +1,87 @@ +import argparse +import logging +import math + +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import data as Data +import model as Model + +# train model +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='//02DiffAD-main/config/smd_time_train.json', + help='JSON file for configuration') + parser.add_argument('-p', '--phase', type=str, choices=['train', 'val'], + help='Run either train(training) or val(generation)', default='train') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + + # logging + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + + Logger.setup_logger(None, opt['path']['log'], + 'train', level=logging.INFO, screen=True) + Logger.setup_logger('val', opt['path']['log'], 'val', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + for phase, dataset_opt in opt['datasets'].items(): + if phase == 'train' and args.phase != 'val': + train_set = Data.create_dataset(dataset_opt, phase) + train_loader = Data.create_dataloader(train_set, dataset_opt, phase) + + logger.info('Initial Dataset Finished') + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + n_epoch = opt['train']['n_epoch'] + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + save_model_iter = math.ceil(train_set.__len__() / opt['datasets']['train']['batch_size']) + while current_epoch < n_epoch: + current_epoch += 1 + for _, train_data in enumerate(train_loader): + current_step += 1 + if current_epoch > n_epoch: + break + diffusion.feed_data(train_data) + diffusion.optimize_parameters() + # log + if current_epoch % opt['train']['print_freq'] == 0 and current_step % save_model_iter == 0: + logs = diffusion.get_current_log() + message = ' '.format( + current_epoch, current_step) + for k, v in logs.items(): + message += '{:s}: {:.4e} '.format(k, v) + tb_logger.add_scalar(k, v, current_step) + logger.info(message) + + # save model + if current_epoch % opt['train']['save_checkpoint_freq'] == 0 and current_step % save_model_iter == 0: + logger.info('Saving models and training states.') + diffusion.save_network(current_epoch, current_step) + + logger.info('End of training.') diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/client2.py b/subject1-4/dynamicSplit/02DiffAD-main_low/client2.py new file mode 100644 index 0000000000000000000000000000000000000000..10704c093473035cd18001ca77f881d63b2fd83d --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/client2.py @@ -0,0 +1,29 @@ + +import socket +import random +import string +import time +import schedule + + +def establish_connection(ip='100.64.0.28',port=9999): + + + +def sent_data(): + data_size_bytes = 1 * 1024 * 1024 # 发送 1-10MB 的数据 + data = #generate_random_data(data_size_bytes) + print("开始发送数据...") + start_time = time.time() + + client_socket.sendall(data) + end_time = time.time() + elapsed_time = end_time - start_time + + print("数据发送完成") + + client_socket.close() + +if __name__ == "__main__": + + diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..bb9a3a7cb5db625fe06e0a4d3cc21576345eb283 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "MSL_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "//experiments/MSL_TRAIN_16_128_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "msl_test", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/msl/msl_test.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 2001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..8440faa95a4a4e4ff00fc9a39b5b13e4c845eaf6 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/msl_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "MSL_TRAIN", + "phase": "train", + "gpu_ids": [ + 1, + 3 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "msl_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/msl/msl_train.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 100, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..a78f9daf724cf1e414115df89a1f06b4160272ea --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "PSM_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "experiments/PSM_TRAIN_16_128_100/checkpoint/E20" + }, + "datasets": { + "test": { + "name": "psm_test", + "mode": "HR", + "dataroot": "tf_dataset/psm/psm_test.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 2001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..c9cd489fd11c86ae208fddd76de65ede3aaecacc --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/psm_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "PSM_TRAIN", + "phase": "train", + "gpu_ids": [ + 0, + 1 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "psm_train", + "mode": "HR", + "dataroot": "tf_dataset/psm/psm_train.csv", + "datatype": "time", + "l_resolution": 16, + "r_resolution": 128, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 100, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 128, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 10, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..c73f7135dddd2ded804a915ee34a58d7c8698f84 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "SMAP_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "//experiments/SMAP_TRAIN_128_2048_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "smap_test", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smap/smap_test.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 3001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d19094abb82daad489a68aee140b9919f832f4 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smap_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "SMAP_TRAIN", + "phase": "train", + "gpu_ids": [ + 1, + 3 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "smap_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smap/smap_train.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "batch_size": 32, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 10, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_test.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_test.json new file mode 100644 index 0000000000000000000000000000000000000000..ed8fc90eda21677a83709f755c848e9880477bb8 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_test.json @@ -0,0 +1,71 @@ +{ + "name": "SMD_TEST", + "phase": "test", + "gpu_ids": [ + 0 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": "./experiments/SMD_TRAIN_128_2048_20/checkpoint/E100" + }, + "datasets": { + "test": { + "name": "smd_test", + "mode": "HR", + "dataroot": "./tf_dataset/smd/smd_test.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + }, + "test": { + "schedule": "linear", + "start_label": 1, + "end_label": 1001, + "step_label": 1, + "step_t": 1000, + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_train.json b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_train.json new file mode 100644 index 0000000000000000000000000000000000000000..12e176f8e41c58c944f533031a80e70c84959a04 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/config/smd_time_train.json @@ -0,0 +1,80 @@ +{ + "name": "SMD_TRAIN", + "phase": "train", + "gpu_ids": [ + 0, + 1 + ], + "path": { + "log": "logs", + "tb_logger": "tb_logger", + "results": "results", + "checkpoint": "checkpoint", + "resume_state": null + }, + "datasets": { + "train": { + "name": "smd_train", + "mode": "HR", + "dataroot": "//02DiffAD-main/tf_dataset/smd/smd_train.csv", + "datatype": "time", + "l_resolution": 128, + "r_resolution": 2048, + "batch_size": 8, + "num_workers": 4, + "use_shuffle": false, + "data_len": -1 + } + }, + "model": { + "which_model_G": "sr3", + "finetune_norm": false, + "unet": { + "in_channel": 2, + "out_channel": 1, + "inner_channel": 32, + "norm_groups": 16, + "channel_multiplier": [ + 1, + 2, + 4, + 8, + 16 + ], + "attn_res": [], + "res_blocks": 1, + "dropout": 0 + }, + "beta_schedule": { + "train": { + "schedule": "linear", + "n_timestep": 20, + "linear_start": 1e-6, + "linear_end": 1e-2 + } + }, + "diffusion": { + "time_size": 2048, + "channels": 1, + "conditional": true + } + }, + "train": { + "n_epoch": 100, + "val_freq": 100, + "save_checkpoint_freq": 100, + "print_freq": 10, + "optimizer": { + "type": "adam", + "lr": 3e-6 + }, + "ema_scheduler": { + "step_start_ema": 5000, + "update_ema_every": 1, + "ema_decay": 0.9999 + } + }, + "wandb": { + "project": "distributed_time" + } +} \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfefb82c3279ac7f0d56de3e6d90c0fdf2c56f14 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a8816259b8e18c7623868a4476e80ed31657e93 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/logger.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5332c5098125c5857c0d4d2ee7398baea8f0cfa3 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1cdad6bde4bcab4d4ddfbbbe7becf97cae141ac4 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/core/__pycache__/metrics.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/logger.py b/subject1-4/dynamicSplit/02DiffAD-main_low/core/logger.py new file mode 100644 index 0000000000000000000000000000000000000000..672af1886ba88de1caa889d7d90b649e34279b73 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/core/logger.py @@ -0,0 +1,143 @@ +import json +import logging +import os +from collections import OrderedDict +from datetime import datetime + + +def mkdirs(paths): + if isinstance(paths, str): + os.makedirs(paths, exist_ok=True) + else: + for path in paths: + os.makedirs(path, exist_ok=True) + + +def get_timestamp(): + return datetime.now().strftime('%y%m%d_%H%M%S') + + +def parse(args, model_epoch=None): + phase = args.phase + opt_path = args.config + gpu_ids = args.gpu_ids + enable_wandb = args.enable_wandb + # remove comments starting with '//' + json_str = '' + with open(opt_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.split('//')[0] + '\n' + json_str += line + + opt = json.loads(json_str, object_pairs_hook=OrderedDict) + + # set log directory + if args.debug: + opt['name'] = 'debug_{}'.format(opt['name']) + if opt['phase'] == 'train': + experiments_root = os.path.join( + 'experiments', '{}_{}_{}_{}'.format(opt['name'], opt['datasets']['train']['l_resolution'], + opt['datasets']['train']['r_resolution'], + opt['model']['beta_schedule']['train']['n_timestep'])) + elif opt['phase'] == 'test': + experiments_root = os.path.join( + 'experiments', '{}_{}_{}_{}_{}'.format(opt['name'], opt['datasets']['test']['l_resolution'], + opt['datasets']['test']['r_resolution'], + opt['model']['beta_schedule']['test']['n_timestep'], model_epoch)) + + opt['path']['experiments_root'] = experiments_root + for key, path in opt['path'].items(): + if 'resume' not in key and 'experiments' not in key: + opt['path'][key] = os.path.join(experiments_root, path) + mkdirs(opt['path'][key]) + + opt['phase'] = phase + + # export CUDA_VISIBLE_DEVICES + if gpu_ids is not None: + opt['gpu_ids'] = [int(id) for id in gpu_ids.split(',')] + gpu_list = gpu_ids + else: + gpu_list = ','.join(str(x) for x in opt['gpu_ids']) + os.environ['CUDA_VISIBLE_DEVICES'] = gpu_list + print('export CUDA_VISIBLE_DEVICES=' + gpu_list) + + if len(gpu_list) > 1: + opt['distributed'] = True + else: + opt['distributed'] = False + + # debug + if 'debug' in opt['name']: + opt['train']['print_freq'] = 2 + opt['train']['save_checkpoint_freq'] = 3 + opt['datasets']['train']['batch_size'] = 2 + opt['model']['beta_schedule']['train']['n_timestep'] = 10 + opt['datasets']['train']['data_len'] = 6 + + # W&B Logging + try: + log_wandb_ckpt = args.log_wandb_ckpt + opt['log_wandb_ckpt'] = log_wandb_ckpt + except: + pass + try: + log_eval = args.log_eval + opt['log_eval'] = log_eval + except: + pass + try: + log_infer = args.log_infer + opt['log_infer'] = log_infer + except: + pass + opt['enable_wandb'] = enable_wandb + + return opt + + +class NoneDict(dict): + def __missing__(self, key): + return None + + +# convert to NoneDict, which return None for missing key. +def dict_to_nonedict(opt): + if isinstance(opt, dict): + new_opt = dict() + for key, sub_opt in opt.items(): + new_opt[key] = dict_to_nonedict(sub_opt) + return NoneDict(**new_opt) + elif isinstance(opt, list): + return [dict_to_nonedict(sub_opt) for sub_opt in opt] + else: + return opt + + +def dict2str(opt, indent_l=1): + '''dict to string for logger''' + msg = '' + for k, v in opt.items(): + if isinstance(v, dict): + msg += ' ' * (indent_l * 2) + k + ':[\n' + msg += dict2str(v, indent_l + 1) + msg += ' ' * (indent_l * 2) + ']\n' + else: + msg += ' ' * (indent_l * 2) + k + ': ' + str(v) + '\n' + return msg + + +def setup_logger(logger_name, root, phase, level=logging.INFO, screen=False): + '''set up logger''' + l = logging.getLogger(logger_name) + formatter = logging.Formatter( + '%(asctime)s.%(msecs)03d - %(levelname)s: %(message)s', datefmt='%y-%m-%d %H:%M:%S') + log_file = os.path.join(root, '{}.log'.format(phase)) + fh = logging.FileHandler(log_file, mode='w') + fh.setFormatter(formatter) + l.setLevel(level) + l.addHandler(fh) + if screen: + sh = logging.StreamHandler() + sh.setFormatter(formatter) + l.addHandler(sh) diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/core/metrics.py b/subject1-4/dynamicSplit/02DiffAD-main_low/core/metrics.py new file mode 100644 index 0000000000000000000000000000000000000000..2691ae949afc35227a77dbe1b582e5916ec238bf --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/core/metrics.py @@ -0,0 +1,169 @@ +import numpy as np +import pandas as pd +from sklearn.metrics import f1_score, accuracy_score, recall_score, precision_score, mean_squared_error + + +def squeeze_tensor(tensor): + return tensor.squeeze().cpu() + + +def update_csv_col_name(all_datas): + df = all_datas.copy() + df.columns = [0, 1, 2, 3] + + return df + + +def tensor2allcsv(visuals, col_num): + df = pd.DataFrame() + sr_df = pd.DataFrame(squeeze_tensor(visuals['SR'])) + ori_df = pd.DataFrame(squeeze_tensor(visuals['ORI'])) + lr_df = pd.DataFrame(squeeze_tensor(visuals['LR'])) + inf_df = pd.DataFrame(squeeze_tensor(visuals['INF'])) + + if col_num != 1: + for i in range(col_num, sr_df.shape[1]): + sr_df.drop(labels=i, axis=1, inplace=True) + ori_df.drop(labels=i, axis=1, inplace=True) + lr_df.drop(labels=i, axis=1, inplace=True) + inf_df.drop(labels=i, axis=1, inplace=True) + + df['SR'] = sr_df.mean(axis=1) + df['ORI'] = ori_df.mean(axis=1) + df['LR'] = lr_df.mean(axis=1) + df['INF'] = inf_df.mean(axis=1) + + df['differ'] = (ori_df - sr_df).abs().mean(axis=1) + df['label'] = squeeze_tensor(visuals['label']) + + differ_df = (sr_df - ori_df) + + return df, sr_df, differ_df + + +def merge_all_csv(all_datas, all_data): + all_datas = pd.concat([all_datas, all_data]) + return all_datas + + +def save_csv(data, data_path): + data.to_csv(data_path, index=False) + + +def get_mean(df): + mean = df['value'].astype('float32').mean() + normal_mean = df['value'][df['label'] == 0].astype('float32').mean() + anomaly_mean = df['value'][df['label'] == 1].astype('float32').mean() + + return mean, normal_mean, anomaly_mean + + +def get_val_mean(df): + mean_dict = {} + + ori = 'ORI' + ori_mean = df[ori].astype('float32').mean() + ori_normal_mean = df[ori][df['label'] == 0].astype('float32').mean() + ori_anomaly_mean = df[ori][df['label'] == 1].astype('float32').mean() + + gen_mean = df['SR'].astype('float32').mean() + gen_normal_mean = df['SR'][df['label'] == 0].astype('float32').mean() + gen_anomaly_mean = df['SR'][df['label'] == 1].astype('float32').mean() + + mean_dict['MSE'] = mean_squared_error(df[ori], df['SR']) + + mean_dict['ori_mean'] = ori_mean + mean_dict['ori_normal_mean'] = ori_normal_mean + mean_dict['ori_anomaly_mean'] = ori_anomaly_mean + + mean_dict['gen_mean'] = gen_mean + mean_dict['gen_normal_mean'] = gen_normal_mean + mean_dict['gen_anomaly_mean'] = gen_anomaly_mean + + mean_dict['mean_differ'] = ori_mean - gen_mean + mean_dict['normal_mean_differ'] = ori_normal_mean - gen_normal_mean + mean_dict['anomaly_mean_differ'] = ori_anomaly_mean - gen_anomaly_mean + + mean_dict['ori_no-ano_differ'] = ori_normal_mean - ori_anomaly_mean + mean_dict['ori_mean-no_differ'] = ori_mean - ori_normal_mean + mean_dict['ori_mean-ano_differ'] = ori_mean - ori_anomaly_mean + + mean_dict['gen_no-ano_differ'] = gen_normal_mean - gen_anomaly_mean + mean_dict['gen_mean-no_differ'] = gen_mean - gen_normal_mean + mean_dict['gen_mean-ano_differ'] = gen_mean - gen_anomaly_mean + + return mean_dict + + +def relabeling_strategy(df, params): + y_true = [] + best_N = 0 + best_f1 = -1 + best_thred = 0 + best_predictions = [] + thresholds = np.arange(params['start_label'], params['end_label'], params['step_label']) + + df_sort = df.sort_values(by="differ", ascending=False) + df_sort = df_sort.reset_index(drop=False) + + for t in thresholds: + # if (t - 1) % params['step_t'] == 0: + # print("t: ", t) + y_true, y_pred, thred = predict_labels(df_sort, t) + for i in range(len(y_true)): + if y_pred[i] == 1 and y_true[i] == 1: + j = i - 1 + while j >= 0 and y_true[j] == 1 and y_pred[j] == 0: + y_pred[j] = 1 + j -= 1 + j = i + 1 + while j < len(y_pred) and y_true[j] == 1 and y_pred[j] == 0: + y_pred[j] = 1 + j += 1 + + f1 = calculate_f1(y_true, y_pred) + if f1 > best_f1: + best_f1 = f1 + best_N = t + best_thred = thred + best_predictions = y_pred + + accuracy = calculate_accuracy(y_true, best_predictions) + precision = calculate_precision(y_true, best_predictions) + recall = calculate_recall(y_true, best_predictions) + + return best_f1,accuracy,precision,recall + + +def predict_labels(df_sort, num): + df_sort['pred_label'] = 0 + df_sort.loc[0:num - 1, 'pred_label'] = 1 + thred = df_sort.loc[num - 1, 'differ'] + + df_sort = df_sort.set_index('index') + df_sort = df_sort.sort_index() + + y_true = df_sort['label'].tolist() + y_pred = df_sort['pred_label'].tolist() + + return y_true, y_pred, thred + + +def calculate_accuracy(y_true, y_pred): + accuracy = accuracy_score(y_true, y_pred) + return accuracy + + +def calculate_precision(y_true, y_pred): + precision = precision_score(y_true, y_pred) + return precision + + +def calculate_recall(y_true, y_pred): + recall = recall_score(y_true, y_pred) + return recall + + +def calculate_f1(y_true, y_pred): + f1 = f1_score(y_true, y_pred) + return f1 diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/LRHR_dataset.py b/subject1-4/dynamicSplit/02DiffAD-main_low/data/LRHR_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b22f92136b774244541be2fbf50193d89e6e1a35 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/data/LRHR_dataset.py @@ -0,0 +1,43 @@ +from torch.utils.data import Dataset + +from data.prepare_time_data import PrepareTimeData + + +class LRHRDataset(Dataset): + def __init__(self, dataroot, datatype, phase, l_resolution=16, r_resolution=128, + split='train', data_len=-1, need_LR=False): + self.datatype = datatype + self.data_len = data_len + self.need_LR = need_LR + self.split = split + self.phase = phase + self.pre_data = PrepareTimeData(data_path=dataroot, phase=phase, base=l_resolution, size=r_resolution) + self.row_num = self.pre_data.get_row_num() + self.col_num = self.pre_data.get_col_num() + + if datatype == 'time': + self.hr_path, self.sr_path, self.labels, self.pre_labels = self.pre_data.get_sr_data() + self.dataset_len = len(self.sr_path) + if self.data_len <= 0: + self.data_len = self.dataset_len + else: + self.data_len = min(self.data_len, self.dataset_len) + else: + raise NotImplementedError( + 'data_type [{:s}] is not recognized.'.format(datatype)) + + def __len__(self): + return self.data_len + + def __getitem__(self, index): + + data_LR = None + data_ORI = self.hr_path[index] + data_HR = self.hr_path[index] + data_SR = self.sr_path[index] + data_label = self.labels[index] + + if self.phase == 'train': + return {'HR': data_HR, 'SR': data_SR, 'Index': index} + else: + return {'ORI': data_ORI, 'HR': data_HR, 'SR': data_SR, 'label': data_label, 'Index': index} diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__init__.py b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9c915ff01b1a408c16e40fb1c5ce6eda3df0e9bc --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__init__.py @@ -0,0 +1,42 @@ +'''create dataset and dataloader''' +import logging + +import torch.utils.data + + +def create_dataloader(dataset, dataset_opt, phase): + '''create dataloader ''' + if phase == 'train': + return torch.utils.data.DataLoader( + dataset, + batch_size=dataset_opt['batch_size'], + shuffle=dataset_opt['use_shuffle'], + num_workers=dataset_opt['num_workers'], + pin_memory=True) + elif phase == 'val': + return torch.utils.data.DataLoader( + dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) + elif phase == 'test': + return torch.utils.data.DataLoader( + dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) + else: + raise NotImplementedError( + 'Dataloader [{:s}] is not found.'.format(phase)) + + +def create_dataset(dataset_opt, phase): + '''create dataset''' + mode = dataset_opt['mode'] + from data.LRHR_dataset import LRHRDataset as D + dataset = D(dataroot=dataset_opt['dataroot'], + datatype=dataset_opt['datatype'], + l_resolution=dataset_opt['l_resolution'], + r_resolution=dataset_opt['r_resolution'], + split=phase, + data_len=dataset_opt['data_len'], + need_LR=(mode == 'LR'), + phase=phase) + logger = logging.getLogger('base') + logger.info('Dataset [{:s} - {:s}] is created.'.format(dataset.__class__.__name__, + dataset_opt['name'])) + return dataset diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fba63b4d4fed74b4bb401570224523be5d07696d Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1e39c78c33598a1ab6c337e134f89dfec4d299be Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/LRHR_dataset.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..583fdb411b8bb294ea66cad9f625dff815b054ca Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..290fea552665c003812613a8cd173fc4557cbf5e Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/__init__.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d96b52ba38087ebbdf94a7d3f4e1cc3c8c0f7bd4 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..512dafab3324b460ed69a511d6cb0b5e2e15e974 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/data/__pycache__/prepare_time_data.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/data/prepare_time_data.py b/subject1-4/dynamicSplit/02DiffAD-main_low/data/prepare_time_data.py new file mode 100644 index 0000000000000000000000000000000000000000..8252bb2701222d7b3746a09e413d721ef01fd67c --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/data/prepare_time_data.py @@ -0,0 +1,359 @@ +import math +import warnings + +import numpy as np +import pandas as pd +import torch + +warnings.filterwarnings("ignore") + + +class PrepareTimeData: + def __init__(self, data_path, phase, base, size): + self.data_path = data_path + self.phase = phase + self.base = base + self.size = size + + self.data_name = self.data_path.split('/')[-1].split('_')[0] + self.read_dataset(self.data_path, self.data_name) + self.df = self.ori_df.copy() + self.row_num = self.ori_df.shape[0] + self.col_num = self.ori_df.shape[1] + self.mean = self.df.mean(axis=1) + + self.df = self.get_mean_df(self.df) + self.df = self.vertical_merge_df(self.df) + self.df = self.join_together_labels(self.df) + self.df = self.fill_data(self.df) + self.df = self.standardize_data(self.df) + + def get_hr_data(self): + df = self.df.copy() + ori_values, values, labels, pre_labels = self.get_data_by_interval(df) + + return ori_values, values, labels, pre_labels + + def get_sr_data(self): + df = self.df.copy() + ori_values, values, labels, pre_labels = self.get_data_by_insert_normal() + + return ori_values, values, labels, pre_labels + + def get_mean_df(self, df): + df = df.copy() + for col in df.columns: + df[col] = self.mean + return df + + def vertical_merge_df(self, df): + df = df.copy() + two_power = 2 + + if self.col_num < 16: + two_power = 16 + df_temp = pd.DataFrame() + col_count = 0 + for i in range(two_power - self.col_num): + if col_count >= self.col_num: + col_count = 0 + df_temp[i] = df.iloc[:, col_count] + col_count = col_count + 1 + else: + while self.col_num > two_power: + two_power = two_power * 2 + df_temp = df.iloc[:, 0:(two_power - self.col_num)] + + col_name = [] + for i in range(self.col_num): + col_name.append('value_' + str(i)) + + df.columns = col_name + col_name = [] + for i in range(self.col_num, two_power): + col_name.append('value_' + str(i)) + + df_temp.columns = col_name + df = pd.concat([df, df_temp], axis=1) + return df + + def join_together_labels(self, df): + df = df.copy() + + if self.phase == 'train': + df['label'] = 0 + else: + df['label'] = self.test_labels + return df + + def fill_data(self, df): + df = df.copy() + data_end = math.ceil(self.row_num / self.size) * self.size + + for i in range(self.row_num, data_end): + # df = df._append(pd.Series(), ignore_index=True) + df = df.append(pd.Series(), ignore_index=True) + # df = pd.concat([df, pd.Series()], ignore_index=True) + # df = pd.DataFrame(df).append(new_row, ignore_index=True) + # df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) + + df.fillna(0, inplace=True) + return df + + def read_dataset(self, data_path, data_name): + if data_name.upper().find('MSL') != -1: + cols = [-1] + self.get_dataset(data_path, cols) + elif data_name.upper().find('PSM') != -1: + if self.phase == 'train': + cols = [-1] + self.get_dataset(data_path, cols) + if self.ori_df.columns.__contains__('timestamp_(min)'): + self.ori_df.drop(columns=['timestamp_(min)'], inplace=True) + else: + cols = [-1] + self.get_dataset(data_path, cols) + if self.ori_df.columns.__contains__('timestamp_(min)'): + self.ori_df.drop(columns=['timestamp_(min)'], inplace=True) + self.test_labels.drop(columns=['timestamp_(min)'], inplace=True) + elif data_name.upper().find('SMAP') != -1: + cols = [0, 1, 2, 3, 4, 7, 8, 9, 10, 12, 13, 15, 16, 19, 20] + self.get_dataset(data_path, cols) + elif data_name.upper().find('SMD') != -1: + cols = [0, 1, 3, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 28, 33, 35, 36, 37] + self.get_dataset(data_path, cols) + + def get_dataset(self, data_path, cols): + if self.phase == 'train': + if -1 in cols: + self.ori_df = pd.read_csv(data_path) + else: + self.ori_df = pd.read_csv(data_path, usecols=cols) + else: + if -1 in cols: + self.ori_df = pd.read_csv(data_path) + else: + self.ori_df = pd.read_csv(data_path, usecols=cols) + + test_label_path = self.data_path.replace('_test.csv', '_test_label.csv') + self.test_labels = pd.read_csv(test_label_path) + + def get_data_by_insert_normal(self): + df = pd.DataFrame(columns=['value', 'label']) + df['value'] = self.df['value_0'] + df['label'] = self.df['label'] + + df_pre_label = self.mutation_point(df) + insert_datas = self.insert_normal(df_pre_label) + + ori_values = [] + values = [] + labels = [] + pre_labels = [] + + start_index = 0 + end_index = self.size + + for col in self.df.columns: + if col == 'label': + continue + self.df[col] = insert_datas['value'] + self.df['pre_label'] = insert_datas['pre_label'] + + ori_df = self.vertical_merge_df(self.ori_df) + ori_df = self.fill_data(ori_df) + + for i in range(0, self.df.shape[0], self.size): + insert_data = pd.DataFrame() + ori_value = pd.DataFrame() + + insert_data = pd.concat([insert_data, self.df[start_index: end_index]]) + ori_value = pd.concat([ori_value, ori_df[start_index: end_index]]) + start_index += self.size + end_index += self.size + + value = insert_data.copy().drop(['label', 'pre_label'], axis=1) + label = insert_data['label'] + pre_label = insert_data['pre_label'] + + value = torch.tensor(np.array(value).astype(np.float32)) + label = torch.tensor(np.array(label).astype(np.int64)) + pre_label = torch.tensor(np.array(pre_label).astype(np.int64)) + ori_value = torch.tensor(np.array(ori_value).astype(np.float32)) + + values.append(value.unsqueeze(0)) + labels.append(label) + pre_labels.append(pre_label) + ori_values.append(ori_value.unsqueeze(0)) + + return ori_values, values, labels, pre_labels + + def standardize_data(self, df): + df = df.copy() + name = self.data_path.split('.csv')[0] + print(name, "Points: {}".format(self.row_num)) + df = self.complete_value(df) + + if self.phase != 'train': + anomaly_len = len(df[df['label'] == 1].index.tolist()) + print("Labeled anomalies: {}".format(anomaly_len)) + + return df + + def complete_value(self, df): + + df.fillna(0, inplace=True) + return df + + def get_mutation_point(self, df_pre_label, start_index, end_index, last_size_var): + size_var = df_pre_label['value'][start_index: end_index].var() + label_count = len(df_pre_label[start_index: end_index][df_pre_label['label'] == 1].index.tolist()) + + if last_size_var == 0: + times = 'Nan' + else: + times = size_var / last_size_var + if times < 1 and times != 0: + times = 1 / times + + if times != "Nan" and times >= 10: + df_pre_label['pre_label'][start_index: end_index] = 1 + else: + df_pre_label['pre_label'][start_index: end_index] = 0 + + return size_var + + def mutation_point(self, df): + df_pre_label = df.copy() + df_pre_label['pre_label'] = 0 + + size = 128 + start_index = 0 + end_index = size + all_var = df_pre_label['value'].var() + + last_size_var = 0 + for i in range(int(self.row_num / size)): + last_size_var = self.get_mutation_point(df_pre_label, start_index, end_index, last_size_var) + + start_index += size + end_index += size + + self.get_mutation_point(df_pre_label, start_index, self.row_num - 1, last_size_var) + return df_pre_label + + def get_index(self, indexes): + count = 0 + start_indexes = [] + end_indexes = [] + + if len(indexes) != 0: + count = count + 1 + start_indexes.append(indexes[0]) + + for i in range(1, len(indexes)): + if indexes[i - 1] + 1 != indexes[i]: + count = count + 1 + end_indexes.append(indexes[i - 1]) + start_indexes.append(indexes[i]) + + end_indexes.append(indexes[len(indexes) - 1]) + + return start_indexes, end_indexes, count + + def insert_normal(self, data): + pre_labels = 'pre_label' + + nor_indexes = data[0:self.row_num][data[pre_labels] == 0].index.tolist() + ano_indexes = data[0:self.row_num][data[pre_labels] == 1].index.tolist() + + nor_start_indexes, nor_end_indexes, nor_count = self.get_index(nor_indexes) + ano_start_indexes, ano_end_indexes, ano_count = self.get_index(ano_indexes) + + interval = int(self.size / self.base) + ano_len = 2 + + df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + for i in range(nor_count): + + if nor_end_indexes[i] - nor_start_indexes[i] + 1 < interval: + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(nor_start_indexes[i], nor_end_indexes[i] + 1) + xp = [nor_start_indexes[i], nor_end_indexes[i]] + fp = [data['value'][nor_start_indexes[i]], data['value'][nor_end_indexes[i]]] + z = np.interp(x, xp, fp) + + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + else: + last_start_x = -1 + start_xs = range(nor_start_indexes[i], nor_end_indexes[i] + 1, interval) + xp = [] + fp = [] + for start_x in start_xs: + if start_x + interval > nor_end_indexes[i]: + last_start_x = start_x + break + + xp.append(start_x) + xp.append(start_x + interval - 1) + + fp.append(data['value'][start_x]) + fp.append(data['value'][start_x + interval - 1]) + + x = range(nor_start_indexes[i], last_start_x) + z = np.interp(x, xp, fp) + + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + + if last_start_x != -1: + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(last_start_x, nor_end_indexes[i] + 1) + xp = [last_start_x, nor_end_indexes[i]] + fp = [data['value'][last_start_x], data['value'][nor_end_indexes[i]]] + z = np.interp(x, xp, fp) + + temp_df['ind'] = x + temp_df['value'] = z + temp_df['pre_label'] = 0 + df = pd.concat([df, temp_df]) + + for i in range(ano_count): + temp_df = pd.DataFrame(columns=['ind', 'value', 'label', 'pre_label']) + + x = range(ano_start_indexes[i] - 1, ano_end_indexes[i] + 2) + xp = [ano_start_indexes[i] - 1, ano_end_indexes[i] + 1] + fp = [data['value'][ano_start_indexes[i] - 1], data['value'][ano_end_indexes[i] + 1]] + z = np.interp(x, xp, fp) + + for j in range(len(x)): + if j == 0 or j == len(x) - 1: + continue + + temp_df.loc[x[j], 'ind'] = x[j] + temp_df.loc[x[j], 'value'] = z[j] + temp_df.loc[x[j], 'pre_label'] = 1 + + df = pd.concat([df, temp_df]) + + df = df.set_index(['ind'], inplace=False).sort_index() + df['label'] = data['label'] + for i in range(self.row_num, data.shape[0]): + df = df.append(pd.Series(), ignore_index=True) + df.fillna(0, inplace=True) + return df + + def get_row_num(self): + return self.row_num + + def get_col_num(self): + return self.col_num diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__init__.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..0eb77d1c825dcdeceddbc99eddf974a062750626 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__init__.py @@ -0,0 +1,10 @@ +import logging + +logger = logging.getLogger('base') + + +def create_model(opt): + from .model import DDPM as M + m = M(opt) + logger.info('Model [{:s}] is created.'.format(m.__class__.__name__)) + return m diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54bd8d91fd9f3fafe38dfc71311f3954ade614ba Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ab45257dcdcb10a5de99202a7f9699e0e3952e25 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/__init__.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ed0058a949bee1f9ad58ff27a2073f075c05450 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3da8f605c28b8459254af43ffe831b1b9783811d Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/base_model.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..60f0c607a64793439df223f4e821eedd86c34e14 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d5b417c9cb376fba19d0425af84e967504b2847c Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/model.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7a44d4d155525f29c7fd852db469999dd4d66b8b Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e5b7f298038b9d6c90d587852ba00957106b20b2 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/__pycache__/networks.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/base_model.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..de1022eb58ad1e4b8f79553282403e843aa04609 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/base_model.py @@ -0,0 +1,47 @@ +import torch +import torch.nn as nn + + +class BaseModel(): + def __init__(self, opt): + self.opt = opt + self.device = torch.device( + 'cuda' if opt['gpu_ids'] is not None else 'cpu') + self.begin_step = 0 + self.begin_epoch = 0 + + def feed_data(self, data): + pass + + def optimize_parameters(self): + pass + + def get_current_visuals(self): + pass + + def get_current_losses(self): + pass + + def print_network(self): + pass + + def set_device(self, x): + if isinstance(x, dict): + for key, item in x.items(): + if item is not None: + x[key] = item.to(self.device) + elif isinstance(x, list): + for item in x: + if item is not None: + item = item.to(self.device) + else: + x = x.to(self.device) + return x + + def get_network_description(self, network): + '''Get the string and total parameters of the network''' + if isinstance(network, nn.DataParallel): + network = network.module + s = str(network) + n = sum(map(lambda x: x.numel(), network.parameters())) + return s, n diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/model.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/model.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c8eaea57f3d0fb75efc8cf275da25742068228 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/model.py @@ -0,0 +1,175 @@ +import logging +import os +from collections import OrderedDict + +import torch +import torch.nn as nn + +import model.networks as networks +from .base_model import BaseModel + +logger = logging.getLogger('base') + + +class DDPM(BaseModel): + def __init__(self, opt): + super(DDPM, self).__init__(opt) + netG = networks.define_G(opt) + + self.netG = self.set_device(netG) + self.schedule_phase = None + + # set loss and load resume state + self.set_loss() + self.set_new_noise_schedule( + opt['model']['beta_schedule']['train'], schedule_phase='train') + + if self.opt['phase'] == 'train': + self.netG.train() + if opt['model']['finetune_norm']: + optim_params = [] + for k, v in self.netG.named_parameters(): + v.requires_grad = False + if k.find('transformer') >= 0: + v.requires_grad = True + v.data.zero_() + optim_params.append(v) + logger.info( + 'Params [{:s}] initialized to 0 and will optimize.'.format(k)) + else: + optim_params = list(self.netG.parameters()) + + self.optG = torch.optim.Adam( + optim_params, lr=opt['train']["optimizer"]["lr"]) + self.log_dict = OrderedDict() + self.load_network() + self.print_network() + + def feed_data(self, data): + self.data = self.set_device(data) + + def optimize_parameters(self): + self.optG.zero_grad() + l_pix = self.netG(self.data) + # need to average in multi-gpu + b, c, h, w = self.data['HR'].shape + l_pix = l_pix.sum() / int(b * c * h * w) + + l_pix.backward() + self.optG.step() + + # set log + self.log_dict['l_pix'] = l_pix.item() + + def test(self,client_socket, continous=False): + self.netG.eval() + + with torch.no_grad(): + ori = self.data['ORI'].squeeze() + min_num = ori.min().item() + max_num = ori.max().item() + if isinstance(self.netG, nn.DataParallel): + self.SR = self.netG.module.super_resolution( + self.data['SR'],client_socket, continous=continous, min_num=min_num, max_num=max_num) + else: + self.SR = self.netG.super_resolution( + self.data['SR'],client_socket, continous=continous, min_num=min_num, max_num=max_num) + self.netG.train() + + def sample(self, batch_size=1, continous=False): + self.netG.eval() + with torch.no_grad(): + if isinstance(self.netG, nn.DataParallel): + self.SR = self.netG.module.sample(batch_size, continous) + else: + self.SR = self.netG.sample(batch_size, continous) + self.netG.train() + + def set_loss(self): + if isinstance(self.netG, nn.DataParallel): + self.netG.module.set_loss(self.device) + else: + self.netG.set_loss(self.device) + + def set_new_noise_schedule(self, schedule_opt, schedule_phase='train'): + if self.schedule_phase is None or self.schedule_phase != schedule_phase: + self.schedule_phase = schedule_phase + if isinstance(self.netG, nn.DataParallel): + self.netG.module.set_new_noise_schedule( + schedule_opt, self.device) + else: + self.netG.set_new_noise_schedule(schedule_opt, self.device) + + def get_current_log(self): + return self.log_dict + + def get_current_visuals(self, need_LR=True, sample=False): + out_dict = OrderedDict() + if sample: + out_dict['SAM'] = self.SR.detach().float().cpu() + else: + out_dict['SR'] = self.SR.detach().float().cpu() + out_dict['INF'] = self.data['SR'].detach().float().cpu() + out_dict['ORI'] = self.data['ORI'].detach().float().cpu() + out_dict['HR'] = self.data['HR'].detach().float().cpu() + out_dict['label'] = self.data['label'].detach().cpu() + if need_LR and 'LR' in self.data: + out_dict['LR'] = self.data['LR'].detach().float().cpu() + else: + out_dict['LR'] = out_dict['INF'] + return out_dict + + def print_network(self): + s, n = self.get_network_description(self.netG) + if isinstance(self.netG, nn.DataParallel): + net_struc_str = '{} - {}'.format(self.netG.__class__.__name__, + self.netG.module.__class__.__name__) + else: + net_struc_str = '{}'.format(self.netG.__class__.__name__) + + logger.info( + 'Network G structure: {}, with parameters: {:,d}'.format(net_struc_str, n)) + logger.info(s) + + def save_network(self, epoch, iter_step): + gen_path = os.path.join( + self.opt['path']['checkpoint'], 'E{}_gen.pth'.format(epoch)) + opt_path = os.path.join( + self.opt['path']['checkpoint'], 'E{}_opt.pth'.format(epoch)) + # gen + network = self.netG + if isinstance(self.netG, nn.DataParallel): + network = network.module + state_dict = network.state_dict() + for key, param in state_dict.items(): + state_dict[key] = param.cpu() + torch.save(state_dict, gen_path) + # opt + opt_state = {'epoch': epoch, 'iter': iter_step, + 'scheduler': None, 'optimizer': None} + opt_state['optimizer'] = self.optG.state_dict() + torch.save(opt_state, opt_path) + + logger.info( + 'Saved model in [{:s}] ...'.format(gen_path)) + + def load_network(self): + load_path = self.opt['path']['resume_state'] + if load_path is not None: + logger.info( + 'Loading pretrained model for G [{:s}] ...'.format(load_path)) + gen_path = '{}_gen.pth'.format(load_path) + opt_path = '{}_opt.pth'.format(load_path) + # gen + network = self.netG + if isinstance(self.netG, nn.DataParallel): + network = network.module + network.load_state_dict(torch.load( + gen_path), strict=(not self.opt['model']['finetune_norm'])) + + if self.opt['phase'] == 'train': + # optimizer + opt = torch.load(opt_path) + self.optG.load_state_dict(opt['optimizer']) + self.begin_step = opt['iter'] + self.begin_epoch = opt['epoch'] diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/networks.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/networks.py new file mode 100644 index 0000000000000000000000000000000000000000..d2769b5477610f43aca87cf485e59c9c1c5e5313 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/networks.py @@ -0,0 +1,112 @@ +import functools +import logging + +import torch +import torch.nn as nn +from torch.nn import init + +logger = logging.getLogger('base') + + +# initialize +def weights_init_normal(m, std=0.02): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + init.normal_(m.weight.data, 0.0, std) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.normal_(m.weight.data, 0.0, std) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.normal_(m.weight.data, 1.0, std) # BN also uses norm + init.constant_(m.bias.data, 0.0) + + +def weights_init_kaiming(m, scale=1): + classname = m.__class__.__name__ + if classname.find('Conv2d') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') + m.weight.data *= scale + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.constant_(m.weight.data, 1.0) + init.constant_(m.bias.data, 0.0) + + +def weights_init_orthogonal(m): + classname = m.__class__.__name__ + if classname.find('Conv') != -1: + init.orthogonal_(m.weight.data, gain=1) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('Linear') != -1: + init.orthogonal_(m.weight.data, gain=1) + if m.bias is not None: + m.bias.data.zero_() + elif classname.find('BatchNorm2d') != -1: + init.constant_(m.weight.data, 1.0) + init.constant_(m.bias.data, 0.0) + + +def init_weights(net, init_type='kaiming', scale=1, std=0.02): + # scale for 'kaiming', std for 'normal'. + logger.info('Initialization method [{:s}]'.format(init_type)) + if init_type == 'normal': + weights_init_normal_ = functools.partial(weights_init_normal, std=std) + net.apply(weights_init_normal_) + elif init_type == 'kaiming': + weights_init_kaiming_ = functools.partial( + weights_init_kaiming, scale=scale) + net.apply(weights_init_kaiming_) + elif init_type == 'orthogonal': + net.apply(weights_init_orthogonal) + else: + raise NotImplementedError( + 'initialization method [{:s}] not implemented'.format(init_type)) + + +# define network +def define_G(opt): + model_opt = opt['model'] + if model_opt['which_model_G'] == 'sr3': + from .sr3_modules import diffusion, unet + + if ('norm_groups' not in model_opt['unet']) or model_opt['unet']['norm_groups'] is None: + model_opt['unet']['norm_groups'] = 32 + + model = unet.UNet( + in_channel=model_opt['unet']['in_channel'], + out_channel=model_opt['unet']['out_channel'], + norm_groups=model_opt['unet']['norm_groups'], + inner_channel=model_opt['unet']['inner_channel'], + channel_mults=model_opt['unet']['channel_multiplier'], + attn_res=model_opt['unet']['attn_res'], + res_blocks=model_opt['unet']['res_blocks'], + dropout=model_opt['unet']['dropout'], + time_size=model_opt['diffusion']['time_size'] + ) + + netG = diffusion.GaussianDiffusion( + model, + time_size=model_opt['diffusion']['time_size'], + channels=model_opt['diffusion']['channels'], + loss_type='l1', # L1 or L2 + conditional=model_opt['diffusion']['conditional'], + schedule_opt=model_opt['beta_schedule']['train'] + ) + if opt['phase'] == 'train': + init_weights(netG, init_type='orthogonal') + + if opt['gpu_ids'] and opt['distributed']: + assert torch.cuda.is_available() + netG = nn.DataParallel(netG) + + return netG diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4424c1bb5fcb44b0485367b87b7362c398559b36 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..be4f35138c6051016821f8bea0216749cb0393f2 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/diffusion.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-37.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-37.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2ec7a53f62dbe159a3ed1c9d5ed50230e31343e5 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-37.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-38.pyc b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-38.pyc new file mode 100644 index 0000000000000000000000000000000000000000..914d38468f8770184e50ff0233c2c5f5851c6297 Binary files /dev/null and b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/__pycache__/unet.cpython-38.pyc differ diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/diffusion.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/diffusion.py new file mode 100644 index 0000000000000000000000000000000000000000..f10c1098b8adaa95c3efed75be63cb49972c1395 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/diffusion.py @@ -0,0 +1,273 @@ +import math +from functools import partial +from inspect import isfunction + +import numpy as np +import torch +from torch import nn +from tqdm import tqdm + +import io +def _warmup_beta(linear_start, linear_end, n_timestep, warmup_frac): + betas = linear_end * np.ones(n_timestep, dtype=np.float64) + warmup_time = int(n_timestep * warmup_frac) + betas[:warmup_time] = np.linspace( + linear_start, linear_end, warmup_time, dtype=np.float64) + return betas + + +def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_end=2e-2, cosine_s=8e-3): + if schedule == 'quad': + betas = np.linspace(linear_start ** 0.5, linear_end ** 0.5, + n_timestep, dtype=np.float64) ** 2 + elif schedule == 'linear': + betas = np.linspace(linear_start, linear_end, + n_timestep, dtype=np.float64) + elif schedule == 'warmup10': + betas = _warmup_beta(linear_start, linear_end, + n_timestep, 0.1) + elif schedule == 'warmup50': + betas = _warmup_beta(linear_start, linear_end, + n_timestep, 0.5) + elif schedule == 'const': + betas = linear_end * np.ones(n_timestep, dtype=np.float64) + elif schedule == 'jsd': # 1/T, 1/(T-1), 1/(T-2), ..., 1 + betas = 1. / np.linspace(n_timestep, + 1, n_timestep, dtype=np.float64) + elif schedule == "cosine": + timesteps = ( + torch.arange(n_timestep + 1, dtype=torch.float64) / + n_timestep + cosine_s + ) + alphas = timesteps / (1 + cosine_s) * math.pi / 2 + alphas = torch.cos(alphas).pow(2) + alphas = alphas / alphas[0] + betas = 1 - alphas[1:] / alphas[:-1] + betas = betas.clamp(max=0.999) + else: + raise NotImplementedError(schedule) + return betas + + +# gaussian diffusion trainer class +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +class GaussianDiffusion(nn.Module): + def __init__(self, denoise_fn, time_size, channels=3, loss_type='l1', + conditional=True, schedule_opt=None): + + super().__init__() + self.channels = channels + self.time_size = time_size + self.denoise_fn = denoise_fn + self.loss_type = loss_type + self.conditional = conditional + if schedule_opt is not None: + pass + + def set_loss(self, device): + if self.loss_type == 'l1': + self.loss_func = nn.L1Loss(reduction='sum').to(device) + elif self.loss_type == 'l2': + self.loss_func = nn.MSELoss(reduction='sum').to(device) + else: + raise NotImplementedError() + + def set_new_noise_schedule(self, schedule_opt, device): + to_torch = partial(torch.tensor, dtype=torch.float32, device=device) + + betas = make_beta_schedule( + schedule=schedule_opt['schedule'], + n_timestep=schedule_opt['n_timestep'], + linear_start=schedule_opt['linear_start'], + linear_end=schedule_opt['linear_end']) + + betas = betas.detach().cpu().numpy() if isinstance( + betas, torch.Tensor) else betas + + alphas = 1. - betas + alphas_cumprod = np.cumprod(alphas, axis=0) + alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1]) + self.sqrt_alphas_cumprod_prev = np.sqrt( + np.append(1., alphas_cumprod)) + + timesteps, = betas.shape + self.num_timesteps = int(timesteps) + + self.register_buffer('betas', to_torch(betas)) + self.register_buffer('alphas_cumprod', to_torch(alphas_cumprod)) + self.register_buffer('alphas_cumprod_prev', + to_torch(alphas_cumprod_prev)) + + # calculations for diffusion q(x_t | x_{t-1}) and others + self.register_buffer('sqrt_alphas_cumprod', + to_torch(np.sqrt(alphas_cumprod))) + self.register_buffer('sqrt_one_minus_alphas_cumprod', + to_torch(np.sqrt(1. - alphas_cumprod))) + self.register_buffer('log_one_minus_alphas_cumprod', + to_torch(np.log(1. - alphas_cumprod))) + self.register_buffer('sqrt_recip_alphas_cumprod', + to_torch(np.sqrt(1. / alphas_cumprod))) + self.register_buffer('sqrt_recipm1_alphas_cumprod', + to_torch(np.sqrt(1. / alphas_cumprod - 1))) + + # calculations for posterior q(x_{t-1} | x_t, x_0) + posterior_variance = betas * \ + (1. - alphas_cumprod_prev) / (1. - alphas_cumprod) + # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t) + self.register_buffer('posterior_variance', + to_torch(posterior_variance)) + # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain + self.register_buffer('posterior_log_variance_clipped', to_torch( + np.log(np.maximum(posterior_variance, 1e-20)))) + self.register_buffer('posterior_mean_coef1', to_torch( + betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod))) + self.register_buffer('posterior_mean_coef2', to_torch( + (1. - alphas_cumprod_prev) * np.sqrt(alphas) / (1. - alphas_cumprod))) + + def predict_start_from_noise(self, x_t, t, noise): + return self.sqrt_recip_alphas_cumprod[t] * x_t - \ + self.sqrt_recipm1_alphas_cumprod[t] * noise + + def q_posterior(self, x_start, x_t, t): + posterior_mean = self.posterior_mean_coef1[t] * \ + x_start + self.posterior_mean_coef2[t] * x_t + posterior_log_variance_clipped = self.posterior_log_variance_clipped[t] + return posterior_mean, posterior_log_variance_clipped + + def p_mean_variance(self, x, t, clip_denoised: bool, condition_x=None): + batch_size = x.shape[0] + noise_level = torch.FloatTensor( + [self.sqrt_alphas_cumprod_prev[t + 1]]).repeat(batch_size, 1).to(x.device) + if condition_x is not None: + x_temp = torch.cat([condition_x, x], dim=1) + noise = self.denoise_fn(x_temp, noise_level) + x_recon = self.predict_start_from_noise(x, t=t, noise=noise) + else: + x_recon = self.predict_start_from_noise( + x, t=t, noise=self.denoise_fn(x, noise_level)) + + if clip_denoised: + x_recon.clamp_(self.min_num, self.max_num) + + model_mean, posterior_log_variance = self.q_posterior( + x_start=x_recon, x_t=x, t=t) + return model_mean, posterior_log_variance + + @torch.no_grad() + def p_sample(self, x, t, clip_denoised=True, condition_x=None): + model_mean, model_log_variance = self.p_mean_variance( + x=x, t=t, clip_denoised=clip_denoised, condition_x=condition_x) + + noise = torch.randn_like(x) if t > 0 else torch.zeros_like(x) + return model_mean + noise * (0.5 * model_log_variance).exp() + + @torch.no_grad() + def p_sample_loop(self, x_in,client_socket, continous=False): + device = self.betas.device + q = 0 + + sample_inter = (1 | (self.num_timesteps // 10)) + if not self.conditional: + shape = x_in + # print(shape)#kk + img = torch.randn(shape, device=device) + ret_img = img + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='sampling loop time step', + total=self.num_timesteps): + img = self.p_sample(img, i, clip_denoised=True) + if i % sample_inter == 0: + ret_img = torch.cat([ret_img, img], dim=0) + else: + x = x_in + shape = x.shape + # print(shape)#kk [1,1,2048,32] + img = torch.randn(shape, device=device) + ret_img = x + + for i in tqdm(reversed(range(0, self.num_timesteps)), desc='sampling loop time step', + total=self.num_timesteps): + img = self.p_sample(img, i, condition_x=x, clip_denoised=True) + if i % sample_inter == 0: + ret_img = torch.cat([ret_img, img], dim=0) + bytes_to_send = i.to_bytes(4, byteorder='big') + self.send_tensor(img, client_socket) + self.send_tensor(x, client_socket) + # print(ret_img.shape)#kk [11,1,2048,32] + # print(ret_img[-1].shape)#kk [1,2048,32] + if continous: + return ret_img + else: + return ret_img[-1] + + def send_tensor(self,tensor, socket): + buffer = io.BytesIO() + torch.save(tensor, buffer) + buffer.seek(0) + data = buffer.read() + # 首先发送数据长度 + socket.sendall(len(data).to_bytes(4, byteorder='big')) + # 然后发送数据 + socket.sendall(data) + + + @torch.no_grad() + def sample(self, batch_size=1, continous=False): + time_size = self.time_size + channels = self.channels + return self.p_sample_loop((batch_size, channels, time_size, time_size), continous) + + @torch.no_grad() + def super_resolution(self, x_in,client_socket, min_num, max_num, continous=False): + self.min_num = min_num + self.max_num = max_num + return self.p_sample_loop(x_in,client_socket, continous) + + def q_sample(self, x_start, continuous_sqrt_alpha_cumprod, noise=None): + noise = default(noise, lambda: torch.randn_like(x_start)) + + # random gama + return ( + continuous_sqrt_alpha_cumprod * x_start + + (1 - continuous_sqrt_alpha_cumprod ** 2).sqrt() * noise + ) + + def p_losses(self, x_in, noise=None): + x_start = x_in['HR'] + [b, c, h, w] = x_start.shape + t = np.random.randint(1, self.num_timesteps + 1) + + continuous_sqrt_alpha_cumprod = torch.FloatTensor( + np.random.uniform( + self.sqrt_alphas_cumprod_prev[t - 1], + self.sqrt_alphas_cumprod_prev[t], + size=b + ) + ).to(x_start.device) + + continuous_sqrt_alpha_cumprod = continuous_sqrt_alpha_cumprod.view(b, -1) + + noise = default(noise, lambda: torch.randn_like(x_start)) + + x_noisy = self.q_sample( + x_start=x_start, continuous_sqrt_alpha_cumprod=continuous_sqrt_alpha_cumprod.view(-1, 1, 1, 1), noise=noise) + + if not self.conditional: + x_recon = self.denoise_fn(x_noisy, continuous_sqrt_alpha_cumprod) + else: + x_cat = torch.cat([x_in['SR'], x_noisy], dim=1) + x_recon = self.denoise_fn(x_cat, continuous_sqrt_alpha_cumprod) + + loss = self.loss_func(noise, x_recon) + return loss + + def forward(self, x, *args, **kwargs): + return self.p_losses(x, *args, **kwargs) diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/unet.py b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/unet.py new file mode 100644 index 0000000000000000000000000000000000000000..77159a7aa693782fe91e10ce6d3083fbc5947bf0 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/model/sr3_modules/unet.py @@ -0,0 +1,274 @@ +import math +from inspect import isfunction + +import torch +from torch import nn + + +def exists(x): + return x is not None + + +def default(val, d): + if exists(val): + return val + return d() if isfunction(d) else d + + +class PositionalEncoding(nn.Module): + def __init__(self, dim): + super().__init__() + self.dim = dim + + def forward(self, noise_level): + count = self.dim // 2 + step = torch.arange(count, dtype=noise_level.dtype, + device=noise_level.device) / count + encoding = noise_level.unsqueeze( + 1) * torch.exp(-math.log(1e4) * step.unsqueeze(0)) + encoding = torch.cat( + [torch.sin(encoding), torch.cos(encoding)], dim=-1) + return encoding + + +class FeatureWiseAffine(nn.Module): + def __init__(self, in_channels, out_channels, use_affine_level=False): + super(FeatureWiseAffine, self).__init__() + self.use_affine_level = use_affine_level + self.noise_func = nn.Sequential( + nn.Linear(in_channels, out_channels * (1 + self.use_affine_level)) + ) + + def forward(self, x, noise_embed): + batch = x.shape[0] + if self.use_affine_level: + gamma, beta = self.noise_func(noise_embed).view( + batch, -1, 1, 1).chunk(3, dim=1) + x = (1 + gamma) * x + beta + else: + x = x + self.noise_func(noise_embed).view(batch, -1, 1, 1) + return x + + +class Swish(nn.Module): + def forward(self, x): + return x * torch.sigmoid(x) + + +class Upsample(nn.Module): + def __init__(self, dim): + super().__init__() + self.up = nn.Upsample(scale_factor=2, mode="nearest") + self.conv = nn.Conv2d(dim, dim, 3, padding=1) + + def forward(self, x): + return self.conv(self.up(x)) + + +class Downsample(nn.Module): + def __init__(self, dim): + super().__init__() + self.conv = nn.Conv2d(dim, dim, 3, 2, 1) + + def forward(self, x): + return self.conv(x) + + +# building block modules +class Block(nn.Module): + def __init__(self, dim, dim_out, groups=32, dropout=0): + super().__init__() + self.block = nn.Sequential( + nn.GroupNorm(groups, dim), + Swish(), + nn.Dropout(dropout) if dropout != 0 else nn.Identity(), + nn.Conv2d(dim, dim_out, 3, padding=1) + ) + + def forward(self, x): + return self.block(x) + + +class ResnetBlock(nn.Module): + def __init__(self, dim, dim_out, noise_level_emb_dim=None, dropout=0, use_affine_level=False, norm_groups=32): + super().__init__() + self.noise_func = FeatureWiseAffine( + noise_level_emb_dim, dim_out, use_affine_level) + + self.block1 = Block(dim, dim_out, groups=norm_groups) + self.block2 = Block(dim_out, dim_out, groups=norm_groups, dropout=dropout) + self.res_conv = nn.Conv2d( + dim, dim_out, 1) if dim != dim_out else nn.Identity() + + def forward(self, x, time_emb): + b, c, h, w = x.shape + h = self.block1(x) + h = self.noise_func(h, time_emb) + h = self.block2(h) + return h + self.res_conv(x) + + +class SelfAttention(nn.Module): + def __init__(self, in_channel, n_head=1, norm_groups=32): + super().__init__() + + self.n_head = n_head + + self.norm = nn.GroupNorm(norm_groups, in_channel) + self.qkv = nn.Conv2d(in_channel, in_channel * 3, 1, bias=False) + self.out = nn.Conv2d(in_channel, in_channel, 1) + + def forward(self, input): + batch, channel, height, width = input.shape + n_head = self.n_head + head_dim = channel // n_head + + norm = self.norm(input) + qkv = self.qkv(norm).view(batch, n_head, head_dim * 3, height, width) + query, key, value = qkv.chunk(3, dim=2) # bhdyx + + attn = torch.einsum( + "bnchw, bncyx -> bnhwyx", query, key + ).contiguous() / math.sqrt(channel) + attn = attn.view(batch, n_head, height, width, -1) + attn = torch.softmax(attn, -1) + attn = attn.view(batch, n_head, height, width, height, width) + + out = torch.einsum("bnhwyx, bncyx -> bnchw", attn, value).contiguous() + out = self.out(out.view(batch, channel, height, width)) + + return out + input + + +class ResnetBlocWithAttn(nn.Module): + def __init__(self, dim, dim_out, *, noise_level_emb_dim=None, norm_groups=32, dropout=0, with_attn=False): + super().__init__() + self.with_attn = with_attn + self.res_block = ResnetBlock( + dim, dim_out, noise_level_emb_dim, norm_groups=norm_groups, dropout=dropout) + if with_attn: + self.attn = SelfAttention(dim_out, norm_groups=norm_groups) + + def forward(self, x, time_emb): + x = self.res_block(x, time_emb) + if (self.with_attn): + x = self.attn(x) + return x + + +class UNet(nn.Module): + def __init__( + self, + in_channel=6, + out_channel=3, + inner_channel=32, + norm_groups=32, + channel_mults=(1, 2, 4, 8, 8), + attn_res=(8), + res_blocks=3, + dropout=0, + with_noise_level_emb=True, + time_size=128 + ): + super().__init__() + + if with_noise_level_emb: + noise_level_channel = inner_channel + self.noise_level_mlp = nn.Sequential( + PositionalEncoding(inner_channel), + nn.Linear(inner_channel, inner_channel * 4), + Swish(), + nn.Linear(inner_channel * 4, inner_channel) + ) + else: + noise_level_channel = None + self.noise_level_mlp = None + + num_mults = len(channel_mults) + pre_channel = inner_channel + feat_channels = [pre_channel] + now_res = time_size + downs = [nn.Conv2d(in_channel, inner_channel, + kernel_size=3, padding=1)] + + for ind in range(num_mults): + is_last = (ind == num_mults - 1) + use_attn = (now_res in attn_res) + channel_mult = inner_channel * channel_mults[ind] + + for _ in range(0, res_blocks): + downs.append(ResnetBlocWithAttn( + pre_channel, channel_mult, noise_level_emb_dim=noise_level_channel, norm_groups=norm_groups, + dropout=dropout, with_attn=use_attn)) + feat_channels.append(channel_mult) + pre_channel = channel_mult + if not is_last: + downs.append(Downsample(pre_channel)) + feat_channels.append(pre_channel) + now_res = now_res // 2 + self.downs = nn.ModuleList(downs) + + self.mid = nn.ModuleList([ + ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=True), + ResnetBlocWithAttn(pre_channel, pre_channel, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=False) + ]) + + ups = [] + for ind in reversed(range(num_mults)): + is_last = (ind < 1) + use_attn = (now_res in attn_res) + channel_mult = inner_channel * channel_mults[ind] + + for _ in range(0, res_blocks + 1): + ups.append(ResnetBlocWithAttn( + pre_channel + feat_channels.pop(), channel_mult, noise_level_emb_dim=noise_level_channel, + norm_groups=norm_groups, + dropout=dropout, with_attn=use_attn)) + pre_channel = channel_mult + if not is_last: + ups.append(Upsample(pre_channel)) + now_res = now_res * 2 + + self.ups = nn.ModuleList(ups) + self.final_conv = Block(pre_channel, default(out_channel, in_channel), groups=norm_groups) + + def forward(self, x, time): + t = self.noise_level_mlp(time) if exists( + self.noise_level_mlp) else None + + feats = [] + for layer in self.downs: + if isinstance(layer, ResnetBlocWithAttn): + x = layer(x, t) + else: + x = layer(x) + feats.append(x) + + for layer in self.mid: + if isinstance(layer, ResnetBlocWithAttn): + x = layer(x, t) + else: + x = layer(x) + + for layer in self.ups: + if isinstance(layer, ResnetBlocWithAttn): + pop_temp = feats.pop() + x_temp = torch.cat((x, pop_temp), dim=1) + x = layer(x_temp, t) + else: + x = layer(x) + + return self.final_conv(x) +#模型组件 +#PositionalEncoding: 位置编码器,用于编码噪声级别。 +#FeatureWiseAffine: 用于特征级别的仿射变换。 +#Swish: Swish 激活函数。 +#Upsample: 上采样模块。 +#Downsample: 下采样模块。 +#Block: 基本的卷积块。 +#ResnetBlock: ResNet 风格的残差块。 +#SelfAttention: 自注意力机制模块,用于提取图像特征中的重要信息。 \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/requirements.txt b/subject1-4/dynamicSplit/02DiffAD-main_low/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..918c9c0a31edb7a74cdce8cced4814813dc67ff5 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/requirements.txt @@ -0,0 +1,7 @@ +torch>=1.12 +torchvision +numpy~=1.23.2 +pandas~=1.5.1 +scikit-learn~=1.1.2 +tqdm~=4.64.1 +tensorboardx~=2.5.1 \ No newline at end of file diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/server2.py b/subject1-4/dynamicSplit/02DiffAD-main_low/server2.py new file mode 100644 index 0000000000000000000000000000000000000000..cf89ab24e12848cfc200d5838a7720e1b4686519 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/server2.py @@ -0,0 +1,38 @@ + + +import socket +import time + +def measure_bandwidth(connection): + start_time = time.time() + total_bytes = 0 + + while True: + data = connection.recv(1024) # 接收数据 + if not data: + break + total_bytes += len(data) + + end_time = time.time() + elapsed_time = end_time - start_time + bandwidth_mbps = (total_bytes * 8) / (elapsed_time * 1e6) # 计算带宽,单位为 Mbps + return bandwidth_mbps, elapsed_time + +def task(): + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_address = ('', 9999) # 空字符串表示监听所有可用的接口 + server_socket.bind(server_address) + server_socket.listen(1) + while True: + print("等待客户端连接...") + client_connection, client_address = server_socket.accept() + print(f"客户端 {client_address} 已连接") + + bandwidth_mbps, elapsed_time = measure_bandwidth(client_connection) + print(f"测量完成:实际可用带宽为 {bandwidth_mbps:.2f} Mbps,传输时间为 {elapsed_time:.2f} 秒") + + client_connection.close() + server_socket.close() + +if __name__ == "__main__": + task() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/test_split.py b/subject1-4/dynamicSplit/02DiffAD-main_low/test_split.py new file mode 100644 index 0000000000000000000000000000000000000000..ac406b3a3e8df1c796ef4a13e12a3f725380d668 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/test_split.py @@ -0,0 +1,164 @@ +import argparse +import logging +import os +import time + +import pandas as pd +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import core.metrics as Metrics +import data as Data +import model as Model +from decimal import Decimal +import sys + +import socket +import random +import string +import time + +def time_test(params, strategy_params, temp_list): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + opt = params['opt'] + logger = params['logger'] + logger_test = params['logger_test'] + model_epoch = params['model_epoch'] + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + logger.info('Begin Model Evaluation.') + idx = 0 + + all_datas = pd.DataFrame() + sr_datas = pd.DataFrame() + differ_datas = pd.DataFrame() + + result_path = '{}'.format(opt['path']['results']) + os.makedirs(result_path, exist_ok=True) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + + #todo 设置高性能云的 IP 地址和端口号 x.x.x.x 9999 + server_address = ('x.x.x.x', 9999) + client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) #kk + client_socket.connect(server_address) #kk + + for _, test_data in enumerate(test_loader): + # print(test_data['ORI'].shape) + # print(test_data['HR'].shape) + # print(test_data['SR'].shape) + # print(test_data['label'].shape) + # sys.exit (0) + idx += 1 + diffusion.feed_data(test_data) + diffusion.test(client_socket,continous=False) + + + + visuals = diffusion.get_current_visuals() + + all_data, sr_df, differ_df = Metrics.tensor2allcsv(visuals, params['col_num']) + all_datas = Metrics.merge_all_csv(all_datas, all_data) + sr_datas = Metrics.merge_all_csv(sr_datas, sr_df) + differ_datas = Metrics.merge_all_csv(differ_datas, differ_df) + + client_socket.close() # kk + os.system("pause") # kk + + # print(idx) + # sys.exit (0) + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + all_datas = all_datas.reset_index(drop=True) + sr_datas = sr_datas.reset_index(drop=True) + differ_datas = differ_datas.reset_index(drop=True) + + for i in range(params['row_num'], all_datas.shape[0]): + all_datas.drop(index=[i], inplace=True) + sr_datas.drop(index=[i], inplace=True) + differ_datas.drop(index=[i], inplace=True) + + f1,accuracy,precision,recall = Metrics.relabeling_strategy(all_datas, strategy_params) + + temp_f1 = Decimal(f1).quantize(Decimal("0.0000")) + temp_acc = Decimal(accuracy).quantize(Decimal("0.0000")) + temp_prec = Decimal(precision).quantize(Decimal("0.0000")) + temp_rec = Decimal(recall).quantize(Decimal("0.0000")) + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + print('F1-score: ', float(temp_f1)) + print('precision: ', float(temp_prec)) + print('accuracy: ', float(temp_acc)) + print('recall: ', float(temp_rec)) + + +# evaluate model performance +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='./config/smd_time_test.json', + help='JSON file for configuration') + #//02DiffAD-main/config/smd_time_test.json + parser.add_argument('-p', '--phase', type=str, choices=['train ', 'val', 'test'], + help='Run either train(training) or val(generation)', default='test') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + temp_list = [] + model_epoch = 100 + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args, model_epoch) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + logger_name = 'test' + str(model_epoch) + # logging + Logger.setup_logger(logger_name, opt['path']['log'], 'test', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + #开始测试 + test_set = Data.create_dataset(opt['datasets']['test'], 'test') + + test_loader = Data.create_dataloader(test_set, opt['datasets']['test'], 'test') + logger.info('Initial Dataset Finished') + logger_test = logging.getLogger(logger_name) # test logger + + start_label = opt['model']['beta_schedule']['test']['start_label'] + end_label = opt['model']['beta_schedule']['test']['end_label'] + step_label = opt['model']['beta_schedule']['test']['step_label'] + step_t = opt['model']['beta_schedule']['test']['step_t'] + strategy_params = { + 'start_label': start_label, + 'end_label': end_label, + 'step_label': step_label, + 'step_t': step_t + } + + params = { + 'opt': opt, + 'logger': logger, + 'logger_test': logger_test, + 'model_epoch': model_epoch, + 'row_num': test_set.row_num, + 'col_num': test_set.col_num + } + + time_test(params, strategy_params, temp_list) + logging.shutdown() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/time_test.py b/subject1-4/dynamicSplit/02DiffAD-main_low/time_test.py new file mode 100644 index 0000000000000000000000000000000000000000..c4f90530e886721500c7f9e4e19184adee962a3b --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/time_test.py @@ -0,0 +1,133 @@ +import argparse +import logging +import os +import time + +import pandas as pd +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import core.metrics as Metrics +import data as Data +import model as Model +from decimal import Decimal + + +def time_test(params, strategy_params, temp_list): + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + opt = params['opt'] + logger = params['logger'] + logger_test = params['logger_test'] + model_epoch = params['model_epoch'] + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + logger.info('Begin Model Evaluation.') + idx = 0 + + all_datas = pd.DataFrame() + sr_datas = pd.DataFrame() + differ_datas = pd.DataFrame() + + result_path = '{}'.format(opt['path']['results']) + os.makedirs(result_path, exist_ok=True) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + for _, test_data in enumerate(test_loader): + idx += 1 + diffusion.feed_data(test_data) + diffusion.test(continous=False) + visuals = diffusion.get_current_visuals() + + all_data, sr_df, differ_df = Metrics.tensor2allcsv(visuals, params['col_num']) + all_datas = Metrics.merge_all_csv(all_datas, all_data) + sr_datas = Metrics.merge_all_csv(sr_datas, sr_df) + differ_datas = Metrics.merge_all_csv(differ_datas, differ_df) + + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + all_datas = all_datas.reset_index(drop=True) + sr_datas = sr_datas.reset_index(drop=True) + differ_datas = differ_datas.reset_index(drop=True) + + for i in range(params['row_num'], all_datas.shape[0]): + all_datas.drop(index=[i], inplace=True) + sr_datas.drop(index=[i], inplace=True) + differ_datas.drop(index=[i], inplace=True) + + f1 = Metrics.relabeling_strategy(all_datas, strategy_params) + temp_f1 = Decimal(f1) + # temp_f1 = Decimal(f1).quantize(Decimal("0.0000")) + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#new + print('F1-score: ', float(temp_f1)) + + +# evaluate model performance +if __name__ == '__main__': + print(time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))#news + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='//02DiffAD-main/config/msl_time_test.json', + help='JSON file for configuration') + parser.add_argument('-p', '--phase', type=str, choices=['train ', 'val', 'test'], + help='Run either train(training) or val(generation)', default='test') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + temp_list = [] + model_epoch = 100 + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args, model_epoch) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + logger_name = 'test' + str(model_epoch) + # logging + Logger.setup_logger(logger_name, opt['path']['log'], 'test', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + test_set = Data.create_dataset(opt['datasets']['test'], 'test') + + test_loader = Data.create_dataloader(test_set, opt['datasets']['test'], 'test') + logger.info('Initial Dataset Finished') + logger_test = logging.getLogger(logger_name) # test logger + + start_label = opt['model']['beta_schedule']['test']['start_label'] + end_label = opt['model']['beta_schedule']['test']['end_label'] + step_label = opt['model']['beta_schedule']['test']['step_label'] + step_t = opt['model']['beta_schedule']['test']['step_t'] + strategy_params = { + 'start_label': start_label, + 'end_label': end_label, + 'step_label': step_label, + 'step_t': step_t + } + + params = { + 'opt': opt, + 'logger': logger, + 'logger_test': logger_test, + 'model_epoch': model_epoch, + 'row_num': test_set.row_num, + 'col_num': test_set.col_num + } + + time_test(params, strategy_params, temp_list) + logging.shutdown() diff --git a/subject1-4/dynamicSplit/02DiffAD-main_low/time_train.py b/subject1-4/dynamicSplit/02DiffAD-main_low/time_train.py new file mode 100644 index 0000000000000000000000000000000000000000..a1f87e3eb4862cbc7f7a5fea1744280a00aa1d69 --- /dev/null +++ b/subject1-4/dynamicSplit/02DiffAD-main_low/time_train.py @@ -0,0 +1,87 @@ +import argparse +import logging +import math + +import torch +from tensorboardX import SummaryWriter + +import core.logger as Logger +import data as Data +import model as Model + +# train model +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default='//02DiffAD-main/config/smd_time_train.json', + help='JSON file for configuration') + parser.add_argument('-p', '--phase', type=str, choices=['train', 'val'], + help='Run either train(training) or val(generation)', default='train') + parser.add_argument('-gpu', '--gpu_ids', type=str, default=None) + parser.add_argument('-debug', '-d', action='store_true') + parser.add_argument('-enable_wandb', action='store_true') + parser.add_argument('-log_wandb_ckpt', action='store_true') + parser.add_argument('-log_eval', action='store_true') + + # parse configs + args = parser.parse_args() + opt = Logger.parse(args) + # Convert to NoneDict, which return None for missing key. + opt = Logger.dict_to_nonedict(opt) + + # logging + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + + Logger.setup_logger(None, opt['path']['log'], + 'train', level=logging.INFO, screen=True) + Logger.setup_logger('val', opt['path']['log'], 'val', level=logging.INFO) + logger = logging.getLogger('base') + logger.info(Logger.dict2str(opt)) + tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger']) + + for phase, dataset_opt in opt['datasets'].items(): + if phase == 'train' and args.phase != 'val': + train_set = Data.create_dataset(dataset_opt, phase) + train_loader = Data.create_dataloader(train_set, dataset_opt, phase) + + logger.info('Initial Dataset Finished') + + diffusion = Model.create_model(opt) + logger.info('Initial Model Finished') + + current_step = diffusion.begin_step + current_epoch = diffusion.begin_epoch + n_epoch = opt['train']['n_epoch'] + + if opt['path']['resume_state']: + logger.info('Resuming training from epoch: {}, iter: {}.'.format( + current_epoch, current_step)) + + diffusion.set_new_noise_schedule( + opt['model']['beta_schedule'][opt['phase']], schedule_phase=opt['phase']) + + save_model_iter = math.ceil(train_set.__len__() / opt['datasets']['train']['batch_size']) + while current_epoch < n_epoch: + current_epoch += 1 + for _, train_data in enumerate(train_loader): + current_step += 1 + if current_epoch > n_epoch: + break + diffusion.feed_data(train_data) + diffusion.optimize_parameters() + # log + if current_epoch % opt['train']['print_freq'] == 0 and current_step % save_model_iter == 0: + logs = diffusion.get_current_log() + message = ' '.format( + current_epoch, current_step) + for k, v in logs.items(): + message += '{:s}: {:.4e} '.format(k, v) + tb_logger.add_scalar(k, v, current_step) + logger.info(message) + + # save model + if current_epoch % opt['train']['save_checkpoint_freq'] == 0 and current_step % save_model_iter == 0: + logger.info('Saving models and training states.') + diffusion.save_network(current_epoch, current_step) + + logger.info('End of training.') diff --git a/subject1-4/dynamicSplit/README.md b/subject1-4/dynamicSplit/README.md new file mode 100644 index 0000000000000000000000000000000000000000..28b2fe53a88382e9415d295af20a980f85975765 --- /dev/null +++ b/subject1-4/dynamicSplit/README.md @@ -0,0 +1,69 @@ +# DynamicSplit + + +## Datasets + +1. MSL (Mars Science Laboratory rover) is a public dataset from NASA. You can learn about it + from [Detecting Spacecraft Anomalies Using LSTMs and Nonparametric Dynamic Thresholding](https://arxiv.org/pdf/1802.04431.pdf). +2. SMAP (Soil Moisture Active Passive satellite) also is a public dataset from NASA. You can learn about it + from [Detecting Spacecraft Anomalies Using LSTMs and Nonparametric Dynamic Thresholding](https://arxiv.org/pdf/1802.04431.pdf). +3. SMD (Server Machine Dataset) is a 5-week-long dataset collected from a large Internet company. You can learn about it + from [Robust Anomaly Detection for Multivariate Time Series through Stochastic Recurrent Neural Network ](https://netman.aiops.org/wp-content/uploads/2019/08/OmniAnomaly_camera-ready.pdf). +Please download the dataset mentioned above and place it in the` tf_dataset` folder, e.g., `tf_dataset/msl/msl_test.csv`. + +## DiffAD-Usage + +This DiffAD model was proposed in this original paper, *Imputation-based Time-Series Anomaly Detection with Conditional Weight-Incremental Diffusion Models*. +### Environment + +Install Python 3.8. + +```python +pip install -r requirements.txt +``` + +By default, datasets are placed under the "tf_dataset" folder. If you need to change +the dataset, you can modify the dataset path in the json file in the "config" folder. +Here is an example of modifying the training dataset path: + +```json +"datasets": { + "train|test": { + "dataroot": "tf_dataset/smap/smap_train.csv", + //"dataroot": "tf_dataset/swat/swat_train.csv" + } +}, +``` +In addition, we provide json configuration files for SMAP datasets for reference. + +### Training +Next, we demonstrate using the SMAP dataset. + +#### We use dataset SMAP for training demonstration. + +```python +# Use time_train.py to train the task. +# Edit json files to adjust dataset path, network structure and hyperparameters. +python time_train.py -c config/smap_time_train.json +``` + +### Test +The trained model is placed in "experiments/*/checkpoint/" by default. +If you need to modify this path, you can refer to "config/smap_time_test.json": + +```json +"path": { + "resume_state": "experiments/SMAP_TRAIN_128_2048_100/checkpoint/E100" +}, +``` + +#### We also use dataset SMAP for testing demonstration. + +```python +# Edit json to adjust pretrain model path and dataset_path. +python time_test.py -c config/smap_time_test.json +``` + + +## DynamicSplit-Usage +The `DiffAD-main_low`and `DiffAD-main_high` files should be placed on the low-performing cloud and high-performing cloud, respectively. `test_split.py` shows an example of computational splitting. \ No newline at end of file