import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
# Load dataset
df = pd.read_csv("RT_IOT2022.csv", encoding="latin1") # เปลี่ยนนามสกุลให้ตรง
X = df.drop(columns=["Attack_type"])
y = df["Attack_type"]
# One-hot encoding สำหรับ categorical features
X = pd.get_dummies(X, columns=["proto", "service"])
# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
# Cross-validation setup
skf
= StratifiedKFold
(n_splits
=5, shuffle=True, random_state
=42)
# Metric storage
accuracy_list = []
f1_macro_list = []
all_f1 = []
all_precision = []
all_recall = []
fold = 1
for train_idx
, test_idx in skf
.split(X_scaled
, y_encoded
): print(f"\n--- Fold {fold} ---")
# Split
X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
# SMOTE oversampling
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)
# Train MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300, random_state=42)
model.fit(X_train_bal, y_train_bal)
# Predict
y_pred = model.predict(X_test)
# Metrics
acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average="macro")
f1_each = f1_score(y_test, y_pred, average=None)
precision_each = precision_score(y_test, y_pred, average=None)
recall_each = recall_score(y_test, y_pred, average=None)
# Save
accuracy_list.append(acc)
f1_macro_list.append(f1_macro)
all_f1.append(f1_each)
all_precision.append(precision_each)
all_recall.append(recall_each)
print(f"Accuracy: {acc:.4f} | F1 (macro): {f1_macro:.4f}")
print("Class\t\t\tF1\t\tPrecision\tRecall")
for idx, cname in enumerate(label_encoder.classes_):
print(f"{cname:20s}\t{f1_each[idx]:.4f}\t\t{precision_each[idx]:.4f}\t\t{recall_each[idx]:.4f}")
fold += 1
# สรุปผลรวม
print("\n==== Summary Results ====\n")
print(f"Accuracy (average): {np.mean(accuracy_list):.4f}")
print(f"F1-score (macro average): {np.mean(f1_macro_list):.4f}")
all_f1 = np.vstack(all_f1)
all_precision = np.vstack(all_precision)
all_recall = np.vstack(all_recall)
print("\nClass\t\t\tF1 (avg)\tPrecision (avg)\tRecall (avg)")
for idx, cname in enumerate(label_encoder.classes_):
print(f"{cname:20s}\t{np.mean(all_f1[:, idx]):.4f}\t\t{np.mean(all_precision[:, idx]):.4f}\t\t{np.mean(all_recall[:, idx]):.4f}")
aW1wb3J0IHBhbmRhcyBhcyBwZAppbXBvcnQgbnVtcHkgYXMgbnAKZnJvbSBza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiBpbXBvcnQgU3RyYXRpZmllZEtGb2xkCmZyb20gc2tsZWFybi5wcmVwcm9jZXNzaW5nIGltcG9ydCBTdGFuZGFyZFNjYWxlciwgTGFiZWxFbmNvZGVyCmZyb20gc2tsZWFybi5tZXRyaWNzIGltcG9ydCBhY2N1cmFjeV9zY29yZSwgcHJlY2lzaW9uX3Njb3JlLCByZWNhbGxfc2NvcmUsIGYxX3Njb3JlCmZyb20gc2tsZWFybi5uZXVyYWxfbmV0d29yayBpbXBvcnQgTUxQQ2xhc3NpZmllcgpmcm9tIGltYmxlYXJuLm92ZXJfc2FtcGxpbmcgaW1wb3J0IFNNT1RFCiAKIyBMb2FkIGRhdGFzZXQKZGYgPSBwZC5yZWFkX2NzdigiUlRfSU9UMjAyMi5jc3YiLCBlbmNvZGluZz0ibGF0aW4xIikgICMg4LmA4Lib4Lil4Li14LmI4Lii4LiZ4LiZ4Liy4Lih4Liq4LiB4Li44Lil4LmD4Lir4LmJ4LiV4Lij4LiHClggPSBkZi5kcm9wKGNvbHVtbnM9WyJBdHRhY2tfdHlwZSJdKQp5ID0gZGZbIkF0dGFja190eXBlIl0KIAojIE9uZS1ob3QgZW5jb2Rpbmcg4Liq4Liz4Lir4Lij4Lix4LiaIGNhdGVnb3JpY2FsIGZlYXR1cmVzClggPSBwZC5nZXRfZHVtbWllcyhYLCBjb2x1bW5zPVsicHJvdG8iLCAic2VydmljZSJdKQogCiMgU2NhbGluZwpzY2FsZXIgPSBTdGFuZGFyZFNjYWxlcigpClhfc2NhbGVkID0gc2NhbGVyLmZpdF90cmFuc2Zvcm0oWCkKIAojIEVuY29kZSB0YXJnZXQKbGFiZWxfZW5jb2RlciA9IExhYmVsRW5jb2RlcigpCnlfZW5jb2RlZCA9IGxhYmVsX2VuY29kZXIuZml0X3RyYW5zZm9ybSh5KQogCiMgQ3Jvc3MtdmFsaWRhdGlvbiBzZXR1cApza2YgPSBTdHJhdGlmaWVkS0ZvbGQobl9zcGxpdHM9NSwgc2h1ZmZsZT1UcnVlLCByYW5kb21fc3RhdGU9NDIpCiAKIyBNZXRyaWMgc3RvcmFnZQphY2N1cmFjeV9saXN0ID0gW10KZjFfbWFjcm9fbGlzdCA9IFtdCmFsbF9mMSA9IFtdCmFsbF9wcmVjaXNpb24gPSBbXQphbGxfcmVjYWxsID0gW10KIApmb2xkID0gMQogCmZvciB0cmFpbl9pZHgsIHRlc3RfaWR4IGluIHNrZi5zcGxpdChYX3NjYWxlZCwgeV9lbmNvZGVkKToKICAgIHByaW50KGYiXG4tLS0gRm9sZCB7Zm9sZH0gLS0tIikKIAogICAgIyBTcGxpdAogICAgWF90cmFpbiwgWF90ZXN0ID0gWF9zY2FsZWRbdHJhaW5faWR4XSwgWF9zY2FsZWRbdGVzdF9pZHhdCiAgICB5X3RyYWluLCB5X3Rlc3QgPSB5X2VuY29kZWRbdHJhaW5faWR4XSwgeV9lbmNvZGVkW3Rlc3RfaWR4XQogCiAgICAjIFNNT1RFIG92ZXJzYW1wbGluZwogICAgc20gPSBTTU9URShyYW5kb21fc3RhdGU9NDIpCiAgICBYX3RyYWluX2JhbCwgeV90cmFpbl9iYWwgPSBzbS5maXRfcmVzYW1wbGUoWF90cmFpbiwgeV90cmFpbikKIAogICAgIyBUcmFpbiBNTFBDbGFzc2lmaWVyCiAgICBtb2RlbCA9IE1MUENsYXNzaWZpZXIoaGlkZGVuX2xheWVyX3NpemVzPSgxMjgsIDY0KSwgbWF4X2l0ZXI9MzAwLCByYW5kb21fc3RhdGU9NDIpCiAgICBtb2RlbC5maXQoWF90cmFpbl9iYWwsIHlfdHJhaW5fYmFsKQogCiAgICAjIFByZWRpY3QKICAgIHlfcHJlZCA9IG1vZGVsLnByZWRpY3QoWF90ZXN0KQogCiAgICAjIE1ldHJpY3MKICAgIGFjYyA9IGFjY3VyYWN5X3Njb3JlKHlfdGVzdCwgeV9wcmVkKQogICAgZjFfbWFjcm8gPSBmMV9zY29yZSh5X3Rlc3QsIHlfcHJlZCwgYXZlcmFnZT0ibWFjcm8iKQogICAgZjFfZWFjaCA9IGYxX3Njb3JlKHlfdGVzdCwgeV9wcmVkLCBhdmVyYWdlPU5vbmUpCiAgICBwcmVjaXNpb25fZWFjaCA9IHByZWNpc2lvbl9zY29yZSh5X3Rlc3QsIHlfcHJlZCwgYXZlcmFnZT1Ob25lKQogICAgcmVjYWxsX2VhY2ggPSByZWNhbGxfc2NvcmUoeV90ZXN0LCB5X3ByZWQsIGF2ZXJhZ2U9Tm9uZSkKIAogICAgIyBTYXZlCiAgICBhY2N1cmFjeV9saXN0LmFwcGVuZChhY2MpCiAgICBmMV9tYWNyb19saXN0LmFwcGVuZChmMV9tYWNybykKICAgIGFsbF9mMS5hcHBlbmQoZjFfZWFjaCkKICAgIGFsbF9wcmVjaXNpb24uYXBwZW5kKHByZWNpc2lvbl9lYWNoKQogICAgYWxsX3JlY2FsbC5hcHBlbmQocmVjYWxsX2VhY2gpCiAKICAgIHByaW50KGYiQWNjdXJhY3k6IHthY2M6LjRmfSB8IEYxIChtYWNybyk6IHtmMV9tYWNybzouNGZ9IikKICAgIHByaW50KCJDbGFzc1x0XHRcdEYxXHRcdFByZWNpc2lvblx0UmVjYWxsIikKICAgIGZvciBpZHgsIGNuYW1lIGluIGVudW1lcmF0ZShsYWJlbF9lbmNvZGVyLmNsYXNzZXNfKToKICAgICAgICBwcmludChmIntjbmFtZToyMHN9XHR7ZjFfZWFjaFtpZHhdOi40Zn1cdFx0e3ByZWNpc2lvbl9lYWNoW2lkeF06LjRmfVx0XHR7cmVjYWxsX2VhY2hbaWR4XTouNGZ9IikKIAogICAgZm9sZCArPSAxCiAKIyDguKrguKPguLjguJvguJzguKXguKPguKfguKEKcHJpbnQoIlxuPT09PSBTdW1tYXJ5IFJlc3VsdHMgPT09PVxuIikKcHJpbnQoZiJBY2N1cmFjeSAoYXZlcmFnZSk6IHtucC5tZWFuKGFjY3VyYWN5X2xpc3QpOi40Zn0iKQpwcmludChmIkYxLXNjb3JlIChtYWNybyBhdmVyYWdlKToge25wLm1lYW4oZjFfbWFjcm9fbGlzdCk6LjRmfSIpCiAKYWxsX2YxID0gbnAudnN0YWNrKGFsbF9mMSkKYWxsX3ByZWNpc2lvbiA9IG5wLnZzdGFjayhhbGxfcHJlY2lzaW9uKQphbGxfcmVjYWxsID0gbnAudnN0YWNrKGFsbF9yZWNhbGwpCiAKcHJpbnQoIlxuQ2xhc3NcdFx0XHRGMSAoYXZnKVx0UHJlY2lzaW9uIChhdmcpXHRSZWNhbGwgKGF2ZykiKQpmb3IgaWR4LCBjbmFtZSBpbiBlbnVtZXJhdGUobGFiZWxfZW5jb2Rlci5jbGFzc2VzXyk6CiAgICBwcmludChmIntjbmFtZToyMHN9XHR7bnAubWVhbihhbGxfZjFbOiwgaWR4XSk6LjRmfVx0XHR7bnAubWVhbihhbGxfcHJlY2lzaW9uWzosIGlkeF0pOi40Zn1cdFx0e25wLm1lYW4oYWxsX3JlY2FsbFs6LCBpZHhdKTouNGZ9Iik=
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
# Load dataset
df = pd.read_csv("RT_IOT2022.csv", encoding="latin1") # เปลี่ยนนามสกุลให้ตรง
X = df.drop(columns=["Attack_type"])
y = df["Attack_type"]
# One-hot encoding สำหรับ categorical features
X = pd.get_dummies(X, columns=["proto", "service"])
# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
# Cross-validation setup
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Metric storage
accuracy_list = []
f1_macro_list = []
all_f1 = []
all_precision = []
all_recall = []
fold = 1
for train_idx, test_idx in skf.split(X_scaled, y_encoded):
print(f"\n--- Fold {fold} ---")
# Split
X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
# SMOTE oversampling
sm = SMOTE(random_state=42)
X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)
# Train MLPClassifier
model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300, random_state=42)
model.fit(X_train_bal, y_train_bal)
# Predict
y_pred = model.predict(X_test)
# Metrics
acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average="macro")
f1_each = f1_score(y_test, y_pred, average=None)
precision_each = precision_score(y_test, y_pred, average=None)
recall_each = recall_score(y_test, y_pred, average=None)
# Save
accuracy_list.append(acc)
f1_macro_list.append(f1_macro)
all_f1.append(f1_each)
all_precision.append(precision_each)
all_recall.append(recall_each)
print(f"Accuracy: {acc:.4f} | F1 (macro): {f1_macro:.4f}")
print("Class\t\t\tF1\t\tPrecision\tRecall")
for idx, cname in enumerate(label_encoder.classes_):
print(f"{cname:20s}\t{f1_each[idx]:.4f}\t\t{precision_each[idx]:.4f}\t\t{recall_each[idx]:.4f}")
fold += 1
# สรุปผลรวม
print("\n==== Summary Results ====\n")
print(f"Accuracy (average): {np.mean(accuracy_list):.4f}")
print(f"F1-score (macro average): {np.mean(f1_macro_list):.4f}")
all_f1 = np.vstack(all_f1)
all_precision = np.vstack(all_precision)
all_recall = np.vstack(all_recall)
print("\nClass\t\t\tF1 (avg)\tPrecision (avg)\tRecall (avg)")
for idx, cname in enumerate(label_encoder.classes_):
print(f"{cname:20s}\t{np.mean(all_f1[:, idx]):.4f}\t\t{np.mean(all_precision[:, idx]):.4f}\t\t{np.mean(all_recall[:, idx]):.4f}")