Lab 7

December 25, 2024

# Step 1: Import Libraries

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

from sklearn.preprocessing import StandardScaler

# Step 2: Load Data

# Replace 'your_file.csv' with your dataset file

data = pd.read_csv('your_file.csv')

# Preview the dataset

print(data.head())

# Step 3: Data Preprocessing

# Select relevant features for segmentation (example: income and spending score)

features = data[['Annual Income (k$)', 'Spending Score (1-100)']]

# Scale the data

scaler = StandardScaler()

scaled_features = scaler.fit_transform(features)

# Step 4: Determine the Optimal Number of Clusters

inertia = []

K = range(1, 11)

for k in K:

kmeans = KMeans(n_clusters=k, random_state=42)

kmeans.fit(scaled_features)

inertia.append(kmeans.inertia_)

# Plot the Elbow Curve

plt.figure(figsize=(8, 5))

plt.plot(K, inertia, 'bx-')

plt.xlabel('Number of Clusters (K)')

plt.ylabel('Inertia')

plt.title('The Elbow Method')

plt.show()

# Step 5: Apply K-Means Clustering

# Choose an appropriate K (e.g., from the elbow curve)

optimal_k = 4 # Example

kmeans = KMeans(n_clusters=optimal_k, random_state=42)

clusters = kmeans.fit_predict(scaled_features)

# Add cluster labels to the original data

data['Cluster'] = clusters

# Step 6: Visualize the Results

plt.figure(figsize=(8, 6))

plt.scatter(scaled_features[:, 0], scaled_features[:, 1], c=clusters, cmap='viridis', alpha=0.6)

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', s=200, marker='X') # Cluster centers

plt.xlabel('Annual Income (scaled)')

plt.ylabel('Spending Score (scaled)')

plt.title('Customer Segments')

plt.show()

# Save the segmented data

data.to_csv('segmented_customers.csv', index=False)

Search This Blog

mona

Lab 7

Comments

Post a Comment

Popular posts from this blog

Lab 1 ai