from sklearn.datasets import load_breast_cancer, load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
#from xgboost import XGBClassifier


# ======================================================
# STEP 1: Dataset Load
# ======================================================
# Apnar nijer CSV thakle eikhane load_breast_cancer() er bodole
# pd.read_csv("file.csv") diye X, y banaben

X, y = load_breast_cancer(return_X_y=True)
# X, y = load_iris(return_X_y=True)   # <-- onno dataset chaile eta use korun


# ======================================================
# STEP 2: Train-Test Split
# ======================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


# ======================================================
# STEP 3: Feature Scaling
# ======================================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


# ======================================================
# STEP 4: Model Select (jeita use korben sheita active rakhun, baki comment)
# ======================================================

clf = RandomForestClassifier(n_estimators=100, random_state=42)

# clf = LinearRegression()
# clf = LogisticRegression(max_iter=1000)
# clf = DecisionTreeClassifier(random_state=42)
# clf = GaussianNB()
# clf = SVC(kernel="rbf", probability=True, random_state=42)
# clf = GradientBoostingClassifier(random_state=42)
# clf = XGBClassifier(eval_metric="logloss", random_state=42)


# ======================================================
# STEP 5: Train (fit) Model
# ======================================================
clf.fit(X_train_scaled, y_train)


# ======================================================
# STEP 6: Evaluation (score)
# ======================================================
print("Algorithm Used:", type(clf).__name__)
print("Test Accuracy:", clf.score(X_test_scaled, y_test))


# ======================================================
# STEP 7: Single Instance Prediction (jeita video te dekhano hoise)
# ======================================================
single_instance = X_test_scaled[1]
print("\nSingle instance:\n", single_instance)

prediction = clf.predict([single_instance])
print("\nPredicted label:", prediction)
print("Actual label:", y_test[1])

if hasattr(clf, "predict_proba"):
    print("Predicted probabilities:", clf.predict_proba([single_instance]))