1.1 linear regression with two variable with inbuilt function
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
file_path = r'C:/Users/abhil/Desktop/ML LAB/Boston.csv'
data = pd.read_csv(file_path)
X = data[['rm']]
y = data['medv']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
plt.scatter(X_test, y_test, color='blue', label='Actual')
plt.plot(X_test, y_pred, color='red', label='Predicted')
plt.title('Linear Regression: Actual vs Predicted (Boston Housing Dataset)')
plt.xlabel('Average Number of Rooms (rm)')
plt.ylabel('Median Value of Homes (medv)')
plt.legend()
plt.show()
print("Coefficient (slope):", model.coef_)
print("Intercept:", model.intercept_)
1.2 linear regression with two variable without inbuilt function
Click the button above to copy this code:
import pandas as pd
file_path = r'C:/Users/abhil/Desktop/ML LAB/Boston.csv'
data = pd.read_csv(file_path)
X = data['rm']
y = data['medv']
mean_x = X.mean()
mean_y = y.mean()
numerator = sum((X - mean_x) * (y - mean_y))
denominator = sum((X - mean_x) ** 2)
slope = numerator / denominator
intercept = mean_y - (slope * mean_x)
y_pred = intercept + slope * X
import matplotlib.pyplot as plt
plt.scatter(X, y, color='blue', label='Actual')
plt.plot(X, y_pred, color='red', label='Predicted')
plt.title('Linear Regression: Actual vs Predicted (Boston Housing Dataset)')
plt.xlabel('Average Number of Rooms (rm)')
plt.ylabel('Median Value of Homes (medv)')
plt.legend()
plt.show()
print("Coefficient (slope):", slope)
print("Intercept:", intercept)
2.1 Multilinear regression with inbuilt function
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
file_path = 'C:/Users/abhil/Desktop/ML LAB/iris.csv'
df = pd.read_csv(file_path)
X = df[['sepal.length', 'sepal.width']]
y = df['petal.length']
model = LinearRegression()
kf = KFold(n_splits=3)
test_indices = None
for train_index, test_index in kf.split(X):
test_indices = test_index
train_indices = np.array([i for i in range(len(X)) if i not in test_indices])
X_train, X_test = X.iloc[train_indices], X.iloc[test_indices]
y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R2 Score:", r2)
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X['sepal.length'], X['sepal.width'], y, color='blue', label='Data Points')
x_range = np.linspace(X['sepal.length'].min(), X['sepal.length'].max(), 10)
y_range = np.linspace(X['sepal.width'].min(), X['sepal.width'].max(), 10)
x_grid, y_grid = np.meshgrid(x_range, y_range)
z_grid = model.predict(np.column_stack((x_grid.ravel(), y_grid.ravel()))).reshape(x_grid.shape)
ax.plot_surface(x_grid, y_grid, z_grid, color='red', alpha=0.5)
ax.set_xlabel('Sepal Length')
ax.set_ylabel('Sepal Width')
ax.set_zlabel('Petal Length')
ax.set_title('3D Linear Regression of Petal Length on Sepal Length and Width')
plt.show()
2.2 Multilinear regression without inbuilt function
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
file_path = 'C:/Users/abhil/Desktop/ML LAB/iris.csv'
df = pd.read_csv(file_path)
X = df[['sepal.length', 'sepal.width']].values # Corrected column names
y = df['petal.length'].values # Corrected column name
X_b = np.c_[np.ones(X.shape[0]), X]
test_indices = np.arange(50)
train_indices = np.array([i for i in range(len(X)) if i not in test_indices])
X_train = X_b[train_indices]
y_train = y[train_indices]
theta_best = np.linalg.inv(X_train.T.dot(X_train)).dot(X_train.T).dot(y_train)
X_test = X_b[test_indices]
y_pred = X_test.dot(theta_best)
mse = np.mean((y[test_indices] - y_pred) ** 2)
mae = np.mean(np.abs(y[test_indices] - y_pred))
ss_total = np.sum((y[test_indices] - np.mean(y[test_indices])) ** 2)
ss_residual = np.sum((y[test_indices] - y_pred) ** 2)
r2 = 1 - (ss_residual / ss_total)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R2 Score:", r2)
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(X[:, 0], X[:, 1], y, color='blue', label='Data Points')
x_range = np.linspace(X[:, 0].min(), X[:, 0].max(), 10)
y_range = np.linspace(X[:, 1].min(), X[:, 1].max(), 10)
x_grid, y_grid = np.meshgrid(x_range, y_range)
z_grid = theta_best[0] + theta_best[1] * x_grid + theta_best[2] * y_grid
ax.plot_surface(x_grid, y_grid, z_grid, color='red', alpha=0.5)
ax.set_xlabel('Sepal Length')
ax.set_ylabel('Sepal Width')
ax.set_zlabel('Petal Length')
ax.set_title('3D Linear Regression of Petal Length on Sepal Length and Width')
ax.legend()
plt.show()
3.1 K-Means
Click the button above to copy this code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
np.random.seed(42)
num_points = 200
circle_1_center = (2, 3)
circle_1_radius = 1.5
circle_2_center = (4, 3)
circle_2_radius = 1.0
def generate_circle_data(center, radius, num_points):
"""Generate random points within a circle."""
r = radius * np.sqrt(np.random.rand(num_points)) # Random radius
theta = np.random.uniform(0, 2 * np.pi, num_points) # Random angle
x = center[0] + r * np.cos(theta) + np.random.normal(scale=0.1, size=num_points)
y = center[1] + r * np.sin(theta) + np.random.normal(scale=0.1, size=num_points)
return np.column_stack((x, y))
points_circle_1 = generate_circle_data(circle_1_center, circle_1_radius, num_points // 2)
points_circle_2 = generate_circle_data(circle_2_center, circle_2_radius, num_points // 2)
X = np.vstack((points_circle_1, points_circle_2))
def filter_points(points, center, radius):
"""Filter points to keep only those inside the circle defined by center and radius."""
distances = np.linalg.norm(points - np.array(center), axis=1)
return points[distances <= radius]
filtered_points_circle_1 = filter_points(points_circle_1, circle_1_center, circle_1_radius)
filtered_points_circle_2 = filter_points(points_circle_2, circle_2_center, circle_2_radius)
filtered_points = np.vstack((filtered_points_circle_1, filtered_points_circle_2))
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans_labels = kmeans.fit_predict(filtered_points)
kmeans_centers = kmeans.cluster_centers_
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.scatter(filtered_points_circle_1[:, 0], filtered_points_circle_1[:, 1], color='blue', alpha=0.6, label='Circle 1')
plt.scatter(filtered_points_circle_2[:, 0], filtered_points_circle_2[:, 1], color='orange', alpha=0.6, label='Circle 2')
plt.scatter(*circle_1_center, color='blue', edgecolor='black', s=100, label='Center 1')
plt.scatter(*circle_2_center, color='orange', edgecolor='black', s=100, label='Center 2')
plt.title('Original Data: Overlapping Circles')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.xlim(1, 5)
plt.ylim(1, 5)
plt.axis('equal')
plt.grid()
plt.legend()
plt.subplot(1, 3, 2)
plt.scatter(filtered_points[:, 0], filtered_points[:, 1], c=kmeans_labels, cmap='viridis', s=30)
plt.scatter(kmeans_centers[:, 0], kmeans_centers[:, 1], c='red', s=200, alpha=0.75, marker='X', label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.xlim(1, 5)
plt.ylim(1, 5)
plt.axis('equal')
plt.grid()
plt.legend()
plt.tight_layout()
plt.show()
3.2 Fuzyy means
Click the button above to copy this code:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
np.random.seed(42)
num_points = 200
circle_1_center = (2, 3)
circle_1_radius = 1.5
circle_2_center = (4, 3)
circle_2_radius = 1.0
def generate_circle_data(center, radius, num_points):
"""Generate random points within a circle."""
r = radius * np.sqrt(np.random.rand(num_points))
theta = np.random.uniform(0, 2 * np.pi, num_points)
x = center[0] + r * np.cos(theta) + np.random.normal(scale=0.1, size=num_points)
y = center[1] + r * np.sin(theta) + np.random.normal(scale=0.1, size=num_points)
return np.column_stack((x, y))
points_circle_1 = generate_circle_data(circle_1_center, circle_1_radius, num_points // 2)
points_circle_2 = generate_circle_data(circle_2_center, circle_2_radius, num_points // 2)
X = np.vstack((points_circle_1, points_circle_2))
def filter_points(points, center, radius):
"""Filter points to keep only those inside the circle defined by center and radius."""
distances = np.linalg.norm(points - np.array(center), axis=1)
return points[distances <= radius]
filtered_points_circle_1 = filter_points(points_circle_1, circle_1_center, circle_1_radius)
filtered_points_circle_2 = filter_points(points_circle_2, circle_2_center, circle_2_radius)
filtered_points = np.vstack((filtered_points_circle_1, filtered_points_circle_2))
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans_labels = kmeans.fit_predict(filtered_points)
kmeans_centers = kmeans.cluster_centers_
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.scatter(filtered_points_circle_1[:, 0], filtered_points_circle_1[:, 1], color='blue', alpha=0.6, label='Circle 1')
plt.scatter(filtered_points_circle_2[:, 0], filtered_points_circle_2[:, 1], color='orange', alpha=0.6, label='Circle 2')
plt.scatter(*circle_1_center, color='blue', edgecolor='black', s=100, label='Center 1')
plt.scatter(*circle_2_center, color='orange', edgecolor='black', s=100, label='Center 2')
plt.title('Original Data: Overlapping Circles')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.xlim(1, 5)
plt.ylim(1, 5)
plt.axis('equal')
plt.grid()
plt.legend()
plt.subplot(1, 3, 2)
plt.scatter(filtered_points[:, 0], filtered_points[:, 1], c=kmeans_labels, cmap='viridis', s=30)
plt.scatter(kmeans_centers[:, 0], kmeans_centers[:, 1], c='red', s=200, alpha=0.75, marker='X', label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.xlim(1, 5)
plt.ylim(1, 5)
plt.axis('equal')
plt.grid()
plt.legend()
plt.tight_layout()
plt.show()
4 SVM
Click the button above to copy this code:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import cross_validate, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from matplotlib.colors import ListedColormap
iris = datasets.load_iris()
X = iris.data[:, :2]
Y = iris.target
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)
svm_model = SVC(kernel='linear')
scoring = {
'accuracy': make_scorer(accuracy_score),
'precision_macro': make_scorer(precision_score, average='macro'),
'recall_macro': make_scorer(recall_score, average='macro'),
'f1_macro': make_scorer(f1_score, average='macro')
}
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_results = cross_validate(svm_model, X_normalized, Y, cv=kf, scoring=scoring)
print("5-Fold Cross-Validation Results:")
print(f"Accuracy: {np.mean(cv_results['test_accuracy']):.4f}")
print(f"Precision: {np.mean(cv_results['test_precision_macro']):.4f}")
print(f"Recall: {np.mean(cv_results['test_recall_macro']):.4f}")
print(f"F1-Score: {np.mean(cv_results['test_f1_macro']):.4f}")
svm_model.fit(X_normalized, Y)
x_min, x_max = X_normalized[:, 0].min() - 0.1, X_normalized[:, 0].max() + 0.1
y_min, y_max = X_normalized[:, 1].min() - 0.1, X_normalized[:, 1].max() + 0.1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01), np.arange(y_min, y_max, 0.01))
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cmap_light = ListedColormap(['white', 'black', 'yellow'])
cmap_bold = ['red', 'green', 'blue']
plt.figure(figsize=(10, 6))
plt.contourf(xx, yy, Z, cmap=cmap_light, alpha=0.6)
scatter = plt.scatter(X_normalized[:, 0], X_normalized[:, 1], c=Y, cmap=ListedColormap(cmap_bold), edgecolor='k', s=40)
plt.xlabel('Sepal Length (normalized)')
plt.ylabel('Sepal Width (normalized)')
plt.title('SVM Decision Boundaries on IRIS Dataset (Linear Kernel)')
plt.legend(handles=scatter.legend_elements()[0], labels=iris.target_names.tolist())
plt.show()
logistic regression with inbuilt function
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, mean_absolute_error, mean_squared_error
# Load the Breast Cancer dataset from the provided file path
file_path= 'C:/Users/abhil/Downloads/breast_cancer.csv'
data = pd.read_csv(file_path)
# Preprocessing
data.columns = data.columns.str.replace('Unnamed: 32', 'diagnosis') # Rename target column if needed
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0}) # Map M to 1 and B to 0
X = data[['radius_mean']] # Use radius_mean as the feature
y = data['diagnosis']
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Fit the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)
# Make predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]
# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred) * 100 # Convert to percentage
mae = mean_absolute_error(y_test, y_prob)
mse = mean_squared_error(y_test, y_prob)
# Print performance metrics
print("Accuracy: {:.2f}%".format(accuracy)) # Format to two decimal places
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
# Plot 1: Data points only
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_test, y_test, color='blue', label='Actual', alpha=0.5)
plt.xlabel('Radius Mean')
plt.ylabel('Diagnosis (0: Benign, 1: Malignant)')
plt.title('Data Points')
plt.legend()
# Plot 2: Data points and logistic regression line
plt.subplot(1, 2, 2)
plt.scatter(X_test, y_test, color='blue', label='Actual', alpha=0.5)
# Generate a range of values for the x-axis
x_range = np.linspace(X['radius_mean'].min(), X['radius_mean'].max(), 300).reshape(-1, 1)
x_range_df = pd.DataFrame(x_range, columns=['radius_mean']) # Convert to DataFrame with the correct column name
# Get predicted probabilities for those values
y_probs = model.predict_proba(x_range_df)[:, 1]
# Plot the logistic regression line
plt.plot(x_range, y_probs, color='red', label='Logistic Regression Line')
plt.xlabel('Radius Mean')
plt.ylabel('Probability of Malignancy')
plt.title('Logistic Regression Fit')
plt.ylim(-0.1, 1.1) # Set limits for y-axis to visualize probabilities
plt.legend()
plt.tight_layout()
plt.show()
logistic regression without inbuilt function
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
file_path= 'C:/Users/abhil/Downloads/breast_cancer.csv'
data = pd.read_csv(file_path)
data.columns = data.columns.str.replace('Unnamed: 32', 'diagnosis') # Rename the target column
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0}) # Map M to 1 and B to 0
X = data[['radius_mean']].values # Feature
y = data['diagnosis'].values # Target
np.random.seed(42)
indices = np.random.permutation(len(X))
train_size = int(0.8 * len(X))
train_indices, test_indices = indices[:train_size], indices[train_size:]
X_train, X_test = X[train_indices], X[test_indices]
y_train, y_test = y[train_indices], y[test_indices]
def sigmoid(z):
return 1 / (1 + np.exp(-z))
class LogisticRegressionScratch:
def _init_(self, learning_rate=0.01, num_iterations=1000):
self.learning_rate = learning_rate
self.num_iterations = num_iterations
self.weights = None
self.bias = None
def fit(self, X, y):
num_samples, num_features = X.shape
self.weights = np.zeros(num_features)
self.bias = 0
for _ in range(self.num_iterations):
linear_model = np.dot(X, self.weights) + self.bias
y_predicted = sigmoid(linear_model)
dw = (1 / num_samples) * np.dot(X.T, (y_predicted - y))
db = (1 / num_samples) * np.sum(y_predicted - y)
self.weights -= self.learning_rate * dw
self.bias -= self.learning_rate * db
def predict(self, X):
linear_model = np.dot(X, self.weights) + self.bias
y_predicted = sigmoid(linear_model)
return [1 if i > 0.5 else 0 for i in y_predicted]
model = LogisticRegressionScratch(learning_rate=0.01, num_iterations=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy = np.mean(y_pred == y_test) * 100
mae = np.mean(np.abs(y_pred - y_test))
mse = np.mean((y_pred - y_test) ** 2)
print("Accuracy: {:.2f}%".format(accuracy))
print("Mean Absolute Error (MAE):", mae)
print("Mean Squared Error (MSE):", mse)
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(X_test, y_test, color='blue', label='Actual', alpha=0.5)
plt.xlabel('Radius Mean')
plt.ylabel('Diagnosis (0: Benign, 1: Malignant)')
plt.title('Data Points')
plt.legend()
plt.subplot(1, 2, 2)
plt.scatter(X_test, y_test, color='blue', label='Actual', alpha=0.5)
x_range = np.linspace(X[:, 0].min(), X[:, 0].max(), 300).reshape(-1, 1)
y_probs = model.predict(x_range)
plt.plot(x_range, [1 if prob > 0.5 else 0 for prob in y_probs], color='red', label='Logistic Regression Line')
plt.xlabel('Radius Mean')
plt.ylabel('Probability of Malignancy')
plt.title('Logistic Regression Fit')
plt.ylim(-0.1, 1.1)
plt.legend()
plt.tight_layout()
plt.show()
SOM
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from minisom import MiniSom
from sklearn.preprocessing import MinMaxScaler
dataset = pd.read_csv('irissom.csv')
print(dataset.head())
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
som = MiniSom(x=5, y=5, input_len=X_scaled.shape[1], sigma=1.0, learning_rate=0.5)
som.train(X_scaled, num_iteration=100)
plt.figure(figsize=(10, 8))
plt.pcolor(som.distance_map().T, cmap='Blues')
plt.colorbar()
for i, x in enumerate(X_scaled):
win = som.winner(x)
if y[i] == 'Iris-setosa':
color = 'red'
elif y[i] == 'Iris-versicolor':
color = 'green'
elif y[i] == 'Iris-virginica':
color = 'blue'
plt.text(win[0] + 0.5, win[1] + 0.5, y[i], color=color, ha='center', va='center')
plt.title('Self-Organizing Map (SOM) of the Iris Dataset')
plt.show()
plt.figure(figsize=(10, 8))
for i, x in enumerate(X_scaled):
win = som.winner(x)
plt.scatter(win[0], win[1], marker='o', c=['red', 'green', 'blue'][['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'].index(y[i])])
plt.title('Iris Dataset Clusters via SOM')
plt.xlabel('SOM Grid X')
plt.ylabel('SOM Grid Y')
plt.show()
KNN
Click the button above to copy this code:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from matplotlib.colors import ListedColormap
dataset = pd.read_csv('irissom.csv')
print(dataset.head())
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_2D = X_scaled[:, :2]
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.3, random_state=42)
def evaluate_knn(k, metric):
print(f"\nEvaluating KNN with k={k} and metric={metric}")
knn = KNeighborsClassifier(n_neighbors=k, metric=metric)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
x_min, x_max = X_2D[:, 0].min() - 1, X_2D[:, 0].max() + 1
y_min, y_max = X_2D[:, 1].min() - 1, X_2D[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
np.arange(y_min, y_max, 0.1))
Z = knn.predict(np.c_[xx.ravel(), yy.ravel(), np.zeros_like(xx.ravel()), np.zeros_like(xx.ravel())])
Z = Z.reshape(xx.shape)
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.4, cmap=ListedColormap(['red', 'green', 'blue']))
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred, edgecolors='k', marker='o', cmap=ListedColormap(['red', 'green', 'blue']))
plt.title(f"KNN (k={k}, {metric} distance) - Test Data Classification")
plt.xlabel('Sepal Length (normalized)')
plt.ylabel('Sepal Width (normalized)')
plt.show()
plt.figure(figsize=(10, 8))
plt.scatter(X_2D[:, 0], X_2D[:, 1], c=y_encoded, cmap=ListedColormap(['red', 'green', 'blue']))
plt.title('Scatter plot of Iris Dataset (Sepal Length vs Sepal Width)')
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')
plt.show()
for k in [5, 10]:
for metric in ['euclidean', 'cosine']:
evaluate_knn(k, metric)
MLP
Click the button above to copy this code:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
import seaborn as sns
from matplotlib import pyplot
from pandas import DataFrame as df
from seaborn import heatmap
dataset_init = pd.read_csv('irissom.csv')
print(dataset_init.head())
dataset_init[['sepal length', 'sepal width', 'petal length', 'petal width']] = dataset_init[['sepal length', 'sepal width', 'petal length', 'petal width']].apply(pd.to_numeric, errors='coerce')
print(dataset_init.isnull().sum())
dataset_init = dataset_init.dropna()
X = dataset_init[['sepal length', 'sepal width', 'petal length', 'petal width']].values
Y = dataset_init['species'].values
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size=0.3, random_state=0)
mlp = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam', alpha=0.0001, max_iter=800)
mlp.fit(x_train, y_train)
predictions_train = mlp.predict(x_train)
accuracy_train = mlp.score(x_train, y_train)
print("Training set accuracy:", accuracy_train)
predictions_val = mlp.predict(x_val)
accuracy_val = mlp.score(x_val, y_val)
print("Validation set accuracy:", accuracy_val)
pred = mlp.predict(X)
cm = confusion_matrix(Y, pred, labels=["Iris-setosa", "Iris-versicolor", "Iris-virginica"])
df_cm = df(cm, range(3), range(3))
sns.set(font_scale=1.4)
heatmap(df_cm, annot=True, annot_kws={"size": 20})
pyplot.xlabel("Predicted Output")
pyplot.ylabel("True Output")
pyplot.show()
dataset_init.hist()
pyplot.show()
from pandas.plotting import scatter_matrix
scatter_matrix(dataset_init)
pyplot.show()
0 Comments
Reply me :