Lab 7

 # Step 1: Import Libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.preprocessing import StandardScaler


# Step 2: Load Data

# Replace 'your_file.csv' with your dataset file

data = pd.read_csv('your_file.csv')


# Preview the dataset

print(data.head())


# Step 3: Data Preprocessing

# Select relevant features for segmentation (example: income and spending score)

features = data[['Annual Income (k$)', 'Spending Score (1-100)']]


# Scale the data

scaler = StandardScaler()

scaled_features = scaler.fit_transform(features)


# Step 4: Determine the Optimal Number of Clusters

inertia = []

K = range(1, 11)

for k in K:

    kmeans = KMeans(n_clusters=k, random_state=42)

    kmeans.fit(scaled_features)

    inertia.append(kmeans.inertia_)


# Plot the Elbow Curve

plt.figure(figsize=(8, 5))

plt.plot(K, inertia, 'bx-')

plt.xlabel('Number of Clusters (K)')

plt.ylabel('Inertia')

plt.title('The Elbow Method')

plt.show()


# Step 5: Apply K-Means Clustering

# Choose an appropriate K (e.g., from the elbow curve)

optimal_k = 4  # Example

kmeans = KMeans(n_clusters=optimal_k, random_state=42)

clusters = kmeans.fit_predict(scaled_features)


# Add cluster labels to the original data

data['Cluster'] = clusters


# Step 6: Visualize the Results

plt.figure(figsize=(8, 6))

plt.scatter(scaled_features[:, 0], scaled_features[:, 1], c=clusters, cmap='viridis', alpha=0.6)

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', s=200, marker='X')  # Cluster centers

plt.xlabel('Annual Income (scaled)')

plt.ylabel('Spending Score (scaled)')

plt.title('Customer Segments')

plt.show()


# Save the segmented data

data.to_csv('segmented_customers.csv', index=False)

Comments

Popular posts from this blog

Lab 1 ai