Data science project

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_curve, auc

# Load the covtype dataset
covtype = fetch_covtype()
X, y = covtype.data, covtype.target

# For simplicity, let's focus on two classes (e.g., classes 1 and 2)
class_1_index = np.where(y == 1)[0]
class_2_index = np.where(y == 2)[0]
selected_indices = np.concatenate([class_1_index, class_2_index])
X_binary = X[selected_indices, :]
y_binary = y[selected_indices]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_binary, y_binary, test_size=0.2, random_state=42)

# Fit an AdaBoost classifier
ada_boost_classifier = AdaBoostClassifier(n_estimators=50, random_state=42)
ada_boost_classifier.fit(X_train, y_train)

# Get predicted probabilities for the positive class (class 2)
y_scores = ada_boost_classifier.predict_proba(X_test)[:, 1]

# Compute ROC curve and AUC
fpr, tpr, thresholds = roc_curve(y_test, y_scores)
roc_auc = auc(fpr, tpr)

# Plot the ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve for AdaBoost')
plt.legend(loc='lower right')
plt.show()
Recent Posts

Recent Comments

Archives

Categories