fork(1) download
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import StratifiedKFold
  4. from sklearn.preprocessing import StandardScaler, LabelEncoder
  5. from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  6. from sklearn.neural_network import MLPClassifier
  7. from imblearn.over_sampling import SMOTE
  8.  
  9. # Load dataset
  10. df = pd.read_csv("RT_IOT2022.csv", encoding="latin1") # เปลี่ยนนามสกุลให้ตรง
  11. X = df.drop(columns=["Attack_type"])
  12. y = df["Attack_type"]
  13.  
  14. # One-hot encoding สำหรับ categorical features
  15. X = pd.get_dummies(X, columns=["proto", "service"])
  16.  
  17. # Scaling
  18. scaler = StandardScaler()
  19. X_scaled = scaler.fit_transform(X)
  20.  
  21. # Encode target
  22. label_encoder = LabelEncoder()
  23. y_encoded = label_encoder.fit_transform(y)
  24.  
  25. # Cross-validation setup
  26. skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
  27.  
  28. # Metric storage
  29. accuracy_list = []
  30. f1_macro_list = []
  31. all_f1 = []
  32. all_precision = []
  33. all_recall = []
  34.  
  35. fold = 1
  36.  
  37. for train_idx, test_idx in skf.split(X_scaled, y_encoded):
  38. print(f"\n--- Fold {fold} ---")
  39.  
  40. # Split
  41. X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
  42. y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
  43.  
  44. # SMOTE oversampling
  45. sm = SMOTE(random_state=42)
  46. X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)
  47.  
  48. # Train MLPClassifier
  49. model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300, random_state=42)
  50. model.fit(X_train_bal, y_train_bal)
  51.  
  52. # Predict
  53. y_pred = model.predict(X_test)
  54.  
  55. # Metrics
  56. acc = accuracy_score(y_test, y_pred)
  57. f1_macro = f1_score(y_test, y_pred, average="macro")
  58. f1_each = f1_score(y_test, y_pred, average=None)
  59. precision_each = precision_score(y_test, y_pred, average=None)
  60. recall_each = recall_score(y_test, y_pred, average=None)
  61.  
  62. # Save
  63. accuracy_list.append(acc)
  64. f1_macro_list.append(f1_macro)
  65. all_f1.append(f1_each)
  66. all_precision.append(precision_each)
  67. all_recall.append(recall_each)
  68.  
  69. print(f"Accuracy: {acc:.4f} | F1 (macro): {f1_macro:.4f}")
  70. print("Class\t\t\tF1\t\tPrecision\tRecall")
  71. for idx, cname in enumerate(label_encoder.classes_):
  72. print(f"{cname:20s}\t{f1_each[idx]:.4f}\t\t{precision_each[idx]:.4f}\t\t{recall_each[idx]:.4f}")
  73.  
  74. fold += 1
  75.  
  76. # สรุปผลรวม
  77. print("\n==== Summary Results ====\n")
  78. print(f"Accuracy (average): {np.mean(accuracy_list):.4f}")
  79. print(f"F1-score (macro average): {np.mean(f1_macro_list):.4f}")
  80.  
  81. all_f1 = np.vstack(all_f1)
  82. all_precision = np.vstack(all_precision)
  83. all_recall = np.vstack(all_recall)
  84.  
  85. print("\nClass\t\t\tF1 (avg)\tPrecision (avg)\tRecall (avg)")
  86. for idx, cname in enumerate(label_encoder.classes_):
  87. print(f"{cname:20s}\t{np.mean(all_f1[:, idx]):.4f}\t\t{np.mean(all_precision[:, idx]):.4f}\t\t{np.mean(all_recall[:, idx]):.4f}")
Success #stdin #stdout 0.02s 25500KB
stdin
Standard input is empty
stdout
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
 
# Load dataset
df = pd.read_csv("RT_IOT2022.csv", encoding="latin1")  # เปลี่ยนนามสกุลให้ตรง
X = df.drop(columns=["Attack_type"])
y = df["Attack_type"]
 
# One-hot encoding สำหรับ categorical features
X = pd.get_dummies(X, columns=["proto", "service"])
 
# Scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
 
# Encode target
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
 
# Cross-validation setup
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
 
# Metric storage
accuracy_list = []
f1_macro_list = []
all_f1 = []
all_precision = []
all_recall = []
 
fold = 1
 
for train_idx, test_idx in skf.split(X_scaled, y_encoded):
    print(f"\n--- Fold {fold} ---")
 
    # Split
    X_train, X_test = X_scaled[train_idx], X_scaled[test_idx]
    y_train, y_test = y_encoded[train_idx], y_encoded[test_idx]
 
    # SMOTE oversampling
    sm = SMOTE(random_state=42)
    X_train_bal, y_train_bal = sm.fit_resample(X_train, y_train)
 
    # Train MLPClassifier
    model = MLPClassifier(hidden_layer_sizes=(128, 64), max_iter=300, random_state=42)
    model.fit(X_train_bal, y_train_bal)
 
    # Predict
    y_pred = model.predict(X_test)
 
    # Metrics
    acc = accuracy_score(y_test, y_pred)
    f1_macro = f1_score(y_test, y_pred, average="macro")
    f1_each = f1_score(y_test, y_pred, average=None)
    precision_each = precision_score(y_test, y_pred, average=None)
    recall_each = recall_score(y_test, y_pred, average=None)
 
    # Save
    accuracy_list.append(acc)
    f1_macro_list.append(f1_macro)
    all_f1.append(f1_each)
    all_precision.append(precision_each)
    all_recall.append(recall_each)
 
    print(f"Accuracy: {acc:.4f} | F1 (macro): {f1_macro:.4f}")
    print("Class\t\t\tF1\t\tPrecision\tRecall")
    for idx, cname in enumerate(label_encoder.classes_):
        print(f"{cname:20s}\t{f1_each[idx]:.4f}\t\t{precision_each[idx]:.4f}\t\t{recall_each[idx]:.4f}")
 
    fold += 1
 
# สรุปผลรวม
print("\n==== Summary Results ====\n")
print(f"Accuracy (average): {np.mean(accuracy_list):.4f}")
print(f"F1-score (macro average): {np.mean(f1_macro_list):.4f}")
 
all_f1 = np.vstack(all_f1)
all_precision = np.vstack(all_precision)
all_recall = np.vstack(all_recall)
 
print("\nClass\t\t\tF1 (avg)\tPrecision (avg)\tRecall (avg)")
for idx, cname in enumerate(label_encoder.classes_):
    print(f"{cname:20s}\t{np.mean(all_f1[:, idx]):.4f}\t\t{np.mean(all_precision[:, idx]):.4f}\t\t{np.mean(all_recall[:, idx]):.4f}")